]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
cloudinit: allow non-root users to set ciupgrade option
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
baa4f62d 1package PVE::QemuServer;
1e3baf05
DM
2
3use strict;
990fc5e2 4use warnings;
3ff84d6f 5
5da072fb
TL
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
1e3baf05 10use File::Basename;
5da072fb 11use File::Copy qw(copy);
1e3baf05
DM
12use File::Path;
13use File::stat;
14use Getopt::Long;
5da072fb
TL
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
1e3baf05 20use IPC::Open3;
c971c4f2 21use JSON;
c3d15108 22use List::Util qw(first);
1f30ac3a 23use MIME::Base64;
5da072fb
TL
24use POSIX;
25use Storable qw(dclone);
f85951dc 26use Time::HiRes qw(gettimeofday usleep);
5da072fb 27use URI::Escape;
425441e6 28use UUID;
5da072fb 29
82841214 30use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
5b65b00d 31use PVE::CGroup;
83870398 32use PVE::CpuSet;
48cf040f 33use PVE::DataCenterConfig;
5da072fb 34use PVE::Exception qw(raise raise_param_exc);
3b56383b 35use PVE::Format qw(render_duration render_bytes);
81d6e4e1 36use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
a52eb3c4
DC
37use PVE::Mapping::PCI;
38use PVE::Mapping::USB;
1e3baf05 39use PVE::INotify;
4df98f2f 40use PVE::JSONSchema qw(get_standard_option parse_property_string);
1e3baf05 41use PVE::ProcFSTools;
fbec3f89 42use PVE::PBSClient;
34e82fa2 43use PVE::RESTEnvironment qw(log_warn);
91bd6c90 44use PVE::RPCEnvironment;
5da072fb 45use PVE::Storage;
b71351a7 46use PVE::SysFSTools;
d04d6af1 47use PVE::Systemd;
82841214 48use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
5da072fb
TL
49
50use PVE::QMPClient;
51use PVE::QemuConfig;
238af88e 52use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
5da072fb 53use PVE::QemuServer::Cloudinit;
5b65b00d 54use PVE::QemuServer::CGroup;
d786a274 55use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
75748d44 56use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
2ea5fb7e 57use PVE::QemuServer::Machine;
5da072fb 58use PVE::QemuServer::Memory;
0a13e08e 59use PVE::QemuServer::Monitor qw(mon_cmd);
74c17b7a 60use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
0cf8d56c 61use PVE::QemuServer::USB;
1e3baf05 62
28e129cc
AD
63my $have_sdn;
64eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67};
68
102cf9d8 69my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
96ed3574 70my $OVMF = {
b5099b4f 71 x86_64 => {
90b20b15
DC
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
b5099b4f
SR
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
96ed3574 99};
2ddc0a5c 100
7f0b5beb 101my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1e3baf05 102
8d88a594
TL
103# Note about locking: we use flock on the config file protect against concurent actions.
104# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106# But you can ignore this kind of lock with the --skiplock flag.
1e3baf05 107
cf364f95
TL
108cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112);
1e3baf05 113
3ea94c60
DM
114PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119});
120
c6737ef1 121PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
7bd9abd2 122 description => "Specifies the QEMU machine type.",
c6737ef1 123 type => 'string',
9471e48b 124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
c6737ef1
DC
125 maxLength => 40,
126 optional => 1,
127});
128
1a67f999 129# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
38277afc
TL
130my $nodename_cache;
131sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134}
1e3baf05 135
ec3582b5
WB
136my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151};
152PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
9d66b397
SI
154my $agent_fmt = {
155 enabled => {
7bd9abd2 156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
9d66b397
SI
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
0a4aff09 162 description => "Run fstrim after moving a disk or migrating the VM.",
9d66b397
SI
163 type => 'boolean',
164 optional => 1,
93e21fd2
CH
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
9d66b397 172 },
48657158
MD
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
9d66b397
SI
180};
181
55655ebc
DC
182my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
6f070e39 189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
55655ebc
DC
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198};
199
6dbcb073
DC
200my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213};
214
1448547f
AL
215my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
211785ee 223 enum => ['spice', 'none'],
1448547f
AL
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228};
229
c4df18db
AL
230my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
d282a24d 234 default => '0',
c4df18db
AL
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
d282a24d 240 default => 'off',
c4df18db
AL
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244};
245
2cf61f33
SR
246my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
8d88a594
TL
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
2cf61f33
SR
257 },
258 max_bytes => {
259 type => 'integer',
8d88a594
TL
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
2cf61f33
SR
263 optional => 1,
264
8d88a594
TL
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
2cf61f33
SR
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
8d88a594
TL
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
2cf61f33
SR
274 optional => 1,
275 default => 1000,
276 },
277};
278
26b443c8
TL
279my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
af2a1a1c
TL
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
26b443c8
TL
292};
293
1e3baf05
DM
294my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
2ff09f52 307 hotplug => {
483ceeab
TL
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
94ec5e7c 311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
c60cad61
DC
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
b3c2bdd1 315 default => 'network,disk,usb',
2ff09f52 316 },
1e3baf05
DM
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
159719e5 327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
1e3baf05
DM
328 },
329 cpulimit => {
330 optional => 1,
c6f773b8 331 type => 'number',
52261945 332 description => "Limit of CPU usage.",
8d88a594
TL
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
1e3baf05 335 minimum => 0,
c6f773b8 336 maximum => 128,
483ceeab 337 default => 0,
1e3baf05
DM
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
483ceeab 342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
67498860
TL
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
e65e35ca 346 minimum => 1,
237239bf 347 maximum => 262144,
67498860 348 default => 'cgroup v1: 1024, cgroup v2: 100',
1e3baf05
DM
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
252e2624 353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
8d88a594 354 ." you use the balloon device.",
1e3baf05
DM
355 minimum => 16,
356 default => 512,
357 },
13a48620 358 balloon => {
483ceeab
TL
359 optional => 1,
360 type => 'integer',
252e2624 361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
8b1accf7
DM
362 minimum => 0,
363 },
364 shares => {
483ceeab
TL
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
8d88a594
TL
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
8b1accf7
DM
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
13a48620 373 },
1e3baf05
DM
374 keyboard => {
375 optional => 1,
376 type => 'string',
233fb336
DW
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
e95fe75f 379 enum => PVE::Tools::kvmkeymaplist(),
aea47dd6 380 default => undef,
1e3baf05
DM
381 },
382 name => {
383 optional => 1,
7fabe17d 384 type => 'string', format => 'dns-name',
1e3baf05
DM
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
cdd20088
AD
387 scsihw => {
388 optional => 1,
389 type => 'string',
52261945 390 description => "SCSI controller model",
6731a4cf 391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
cdd20088
AD
392 default => 'lsi',
393 },
1e3baf05
DM
394 description => {
395 optional => 1,
396 type => 'string',
a200af10
TL
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
1e3baf05
DM
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
483ceeab 404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
52261945
DM
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407Specify guest operating system. This is used to enable special
408optimization/features for specific operating systems:
409
410[horizontal]
411other;; unspecified OS
412wxp;; Microsoft Windows XP
413w2k;; Microsoft Windows 2000
414w2k3;; Microsoft Windows 2003
415w2k8;; Microsoft Windows 2008
416wvista;; Microsoft Windows Vista
417win7;; Microsoft Windows 7
44c2a647 418win8;; Microsoft Windows 8/2012/2012r2
1f5828f2 419win10;; Microsoft Windows 10/2016/2019
179b9f1b 420win11;; Microsoft Windows 11/2022
52261945 421l24;; Linux 2.4 Kernel
ea0bc514 422l26;; Linux 2.6 - 6.X Kernel
52261945 423solaris;; Solaris/OpenSolaris/OpenIndiania kernel
1e3baf05
DM
424EODESC
425 },
426 boot => {
427 optional => 1,
2141a802 428 type => 'string', format => 'pve-qm-boot',
483ceeab
TL
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
1e3baf05
DM
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
2141a802 435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
03e480fc 436 pattern => '(ide|sata|scsi|virtio)\d+',
1e3baf05
DM
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
8a010eae
AD
459 numa => {
460 optional => 1,
461 type => 'boolean',
1917695c 462 description => "Enable/disable NUMA.",
8a010eae
AD
463 default => 0,
464 },
7023f3ea
AD
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
f36e9894
SR
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
4df98f2f
TL
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
f36e9894 477 },
de9d1e55 478 vcpus => {
3bd18e48
AD
479 optional => 1,
480 type => 'integer',
de9d1e55 481 description => "Number of hotplugged vcpus.",
3bd18e48 482 minimum => 1,
de9d1e55 483 default => 0,
3bd18e48 484 },
1e3baf05
DM
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
bc84dcca 491 agent => {
ab6a046f 492 optional => 1,
7bd9abd2 493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
9d66b397
SI
494 type => 'string',
495 format => $agent_fmt,
ab6a046f 496 },
1e3baf05
DM
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
8c559505
DM
506 description => "Enable/disable time drift fix.",
507 default => 0,
1e3baf05 508 },
19672434 509 localtime => {
1e3baf05
DM
510 optional => 1,
511 type => 'boolean',
8d88a594
TL
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
1e3baf05
DM
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
55655ebc
DC
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
4df98f2f
TL
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
1e3baf05 531 },
0ea9541d
DM
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
52261945 535 description => "Create a virtual hardware watchdog device.",
4df98f2f
TL
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
0ea9541d 539 },
1e3baf05
DM
540 startdate => {
541 optional => 1,
19672434 542 type => 'string',
1e3baf05 543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
4df98f2f
TL
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
1e3baf05
DM
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
43574f73 549 startup => get_standard_option('pve-startup-order'),
68eda3ab
AD
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
1e3baf05
DM
556 args => {
557 optional => 1,
558 type => 'string',
52261945
DM
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
c7a8aad6 561Arbitrary arguments passed to kvm, for example:
1e3baf05 562
bda7ccb1 563args: -no-reboot -smbios 'type=0,vendor=FOO'
c7a8aad6
FG
564
565NOTE: this option is for experts only.
1e3baf05
DM
566EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
52261945 572 description => "Enable/disable the USB tablet device.",
4df98f2f
TL
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
1e3baf05
DM
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
04432191 588 type => 'number',
1e3baf05
DM
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
04432191 591 default => 0.1,
1e3baf05
DM
592 },
593 cdrom => {
594 optional => 1,
b799312f 595 type => 'string', format => 'pve-qm-ide',
8485b9ba 596 typetext => '<volume>',
1e3baf05
DM
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
5d008ad3 603 format => 'pve-vm-cpu-conf',
1e3baf05 604 },
b7ba6b79
DM
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
982c7f12
DM
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
18bfb361
DM
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
4df98f2f
TL
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
18bfb361 620 },
253624c7
FG
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
c6737ef1 625 runningmachine => get_standard_option('pve-qemu-machine', {
4df98f2f
TL
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
c6737ef1 628 }),
ea1c2110 629 runningcpu => {
4df98f2f
TL
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
ea1c2110
SR
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
c6737ef1 637 machine => get_standard_option('pve-qemu-machine'),
d731ecbe
WB
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
2796e7d5
DM
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
5d004b00 647 maxLength => 512,
2796e7d5
DM
648 optional => 1,
649 },
cb0e4540
AG
650 protection => {
651 optional => 1,
652 type => 'boolean',
4df98f2f
TL
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
cb0e4540
AG
655 default => 0,
656 },
3edb45e7 657 bios => {
a783c78e 658 optional => 1,
3edb45e7
DM
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
a783c78e 663 },
6ee499ff
DC
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
4df98f2f
TL
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
f7ed64e7 678 default => "1 (autogenerated)",
6ee499ff
DC
679 optional => 1,
680 },
9e784b11
DC
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
6dbcb073
DC
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
4df98f2f
TL
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
6dbcb073 692 optional => 1,
2e7b5925
AL
693 },
694 audio0 => {
695 type => 'string',
1448547f 696 format => $audio_fmt,
194b65f1 697 description => "Configure a audio device, useful in combination with QXL/Spice.",
2e7b5925
AL
698 optional => 1
699 },
c4df18db
AL
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
b8e7068a
DC
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
2cf61f33
SR
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
26b443c8
TL
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
83870398
DB
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
326704e7 725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
83870398
DB
726 optional => 1,
727 },
9ed7a77c
WB
728};
729
cb702ebe
DL
730my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
4df98f2f
TL
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
cb702ebe
DL
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
eba285f5 742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
cb702ebe
DL
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
eba285f5 749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
cb702ebe
DL
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
101beafe 753 vendor => {
eba285f5
TL
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
101beafe 759 },
cb702ebe
DL
760};
761PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
92c02f6c 763# any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
9ed7a77c 764my $confdesc_cloudinit = {
41cd94a0
WB
765 citype => {
766 optional => 1,
767 type => 'string',
4df98f2f
TL
768 description => 'Specifies the cloud-init configuration format. The default depends on the'
769 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
770 .' and `configdrive2` for windows.',
545eec65 771 enum => ['configdrive2', 'nocloud', 'opennebula'],
41cd94a0 772 },
7b42f951
WB
773 ciuser => {
774 optional => 1,
775 type => 'string',
4df98f2f
TL
776 description => "cloud-init: User name to change ssh keys and password for instead of the"
777 ." image's configured default user.",
7b42f951
WB
778 },
779 cipassword => {
780 optional => 1,
781 type => 'string',
4df98f2f
TL
782 description => 'cloud-init: Password to assign the user. Using this is generally not'
783 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
784 .' support hashed passwords.',
7b42f951 785 },
efa3355d
LN
786 ciupgrade => {
787 optional => 1,
788 type => 'boolean',
178c355d 789 description => 'cloud-init: do an automatic package upgrade after the first boot.',
ec11b92a 790 default => 1,
efa3355d 791 },
cb702ebe
DL
792 cicustom => {
793 optional => 1,
794 type => 'string',
4df98f2f
TL
795 description => 'cloud-init: Specify custom files to replace the automatically generated'
796 .' ones at start.',
cb702ebe
DL
797 format => 'pve-qm-cicustom',
798 },
0c9a7596
AD
799 searchdomain => {
800 optional => 1,
801 type => 'string',
bd49ecb4 802 description => 'cloud-init: Sets DNS search domains for a container. Create will'
4df98f2f 803 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 804 .' are set.',
0c9a7596
AD
805 },
806 nameserver => {
807 optional => 1,
808 type => 'string', format => 'address-list',
bd49ecb4 809 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
4df98f2f 810 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 811 .' are set.',
0c9a7596
AD
812 },
813 sshkeys => {
814 optional => 1,
815 type => 'string',
816 format => 'urlencoded',
1d1c4e1c 817 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
0c9a7596 818 },
1e3baf05
DM
819};
820
821# what about other qemu settings ?
822#cpu => 'string',
823#machine => 'string',
824#fda => 'file',
825#fdb => 'file',
826#mtdblock => 'file',
827#sd => 'file',
828#pflash => 'file',
829#snapshot => 'bool',
830#bootp => 'file',
831##tftp => 'dir',
832##smb => 'dir',
833#kernel => 'file',
834#append => 'string',
835#initrd => 'file',
836##soundhw => 'string',
837
838while (my ($k, $v) = each %$confdesc) {
839 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
840}
841
5bdcf937 842my $MAX_NETS = 32;
bae179aa 843my $MAX_SERIAL_PORTS = 4;
1989a89c 844my $MAX_PARALLEL_PORTS = 3;
2ed5d572
AD
845my $MAX_NUMA = 8;
846
ffc0d8c7
WB
847my $numa_fmt = {
848 cpus => {
849 type => "string",
850 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 851 description => "CPUs accessing this NUMA node.",
ffc0d8c7
WB
852 format_description => "id[-id];...",
853 },
854 memory => {
855 type => "number",
52261945 856 description => "Amount of memory this NUMA node provides.",
ffc0d8c7
WB
857 optional => 1,
858 },
859 hostnodes => {
860 type => "string",
861 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 862 description => "Host NUMA nodes to use.",
ffc0d8c7
WB
863 format_description => "id[-id];...",
864 optional => 1,
865 },
866 policy => {
867 type => 'string',
868 enum => [qw(preferred bind interleave)],
52261945 869 description => "NUMA allocation policy.",
ffc0d8c7
WB
870 optional => 1,
871 },
872};
873PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
2ed5d572
AD
874my $numadesc = {
875 optional => 1,
ffc0d8c7 876 type => 'string', format => $numa_fmt,
52261945 877 description => "NUMA topology.",
2ed5d572
AD
878};
879PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
880
881for (my $i = 0; $i < $MAX_NUMA; $i++) {
882 $confdesc->{"numa$i"} = $numadesc;
883}
1e3baf05 884
f7bc17ca
TL
885my $nic_model_list = [
886 'e1000',
887 'e1000-82540em',
888 'e1000-82544gc',
889 'e1000-82545em',
e83dd50a 890 'e1000e',
f7bc17ca
TL
891 'i82551',
892 'i82557b',
893 'i82559er',
894 'ne2k_isa',
895 'ne2k_pci',
896 'pcnet',
897 'rtl8139',
898 'virtio',
899 'vmxnet3',
900];
6b64503e 901my $nic_model_list_txt = join(' ', sort @$nic_model_list);
1e3baf05 902
52261945
DM
903my $net_fmt_bridge_descr = <<__EOD__;
904Bridge to attach the network device to. The Proxmox VE standard bridge
905is called 'vmbr0'.
906
907If you do not specify a bridge, we create a kvm user (NATed) network
908device, which provides DHCP and DNS services. The following addresses
909are used:
910
911 10.0.2.2 Gateway
912 10.0.2.3 DNS Server
913 10.0.2.4 SMB Server
914
915The DHCP server assign addresses to the guest starting from 10.0.2.15.
916__EOD__
917
cd9c34d1 918my $net_fmt = {
399d96db 919 macaddr => get_standard_option('mac-addr', {
4df98f2f
TL
920 description => "MAC address. That address must be unique withing your network. This is"
921 ." automatically generated if not specified.",
399d96db 922 }),
7f694a71
DM
923 model => {
924 type => 'string',
4df98f2f
TL
925 description => "Network Card Model. The 'virtio' model provides the best performance with"
926 ." very low CPU overhead. If your guest does not support this driver, it is usually"
927 ." best to use 'e1000'.",
7f694a71
DM
928 enum => $nic_model_list,
929 default_key => 1,
930 },
931 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
dbdcc5cd 932 bridge => get_standard_option('pve-bridge-id', {
52261945 933 description => $net_fmt_bridge_descr,
cd9c34d1 934 optional => 1,
dbdcc5cd 935 }),
cd9c34d1
WB
936 queues => {
937 type => 'integer',
66222265 938 minimum => 0, maximum => 64,
cd9c34d1 939 description => 'Number of packet queues to be used on the device.',
cd9c34d1
WB
940 optional => 1,
941 },
942 rate => {
943 type => 'number',
944 minimum => 0,
52261945 945 description => "Rate limit in mbps (megabytes per second) as floating point number.",
cd9c34d1
WB
946 optional => 1,
947 },
948 tag => {
949 type => 'integer',
9f41a659 950 minimum => 1, maximum => 4094,
cd9c34d1 951 description => 'VLAN tag to apply to packets on this interface.',
cd9c34d1
WB
952 optional => 1,
953 },
954 trunks => {
955 type => 'string',
956 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
957 description => 'VLAN trunks to pass through this interface.',
7f694a71 958 format_description => 'vlanid[;vlanid...]',
cd9c34d1
WB
959 optional => 1,
960 },
961 firewall => {
962 type => 'boolean',
963 description => 'Whether this interface should be protected by the firewall.',
cd9c34d1
WB
964 optional => 1,
965 },
966 link_down => {
967 type => 'boolean',
52261945 968 description => 'Whether this interface should be disconnected (like pulling the plug).',
cd9c34d1
WB
969 optional => 1,
970 },
61a14cde
AD
971 mtu => {
972 type => 'integer',
973 minimum => 1, maximum => 65520,
0530177b 974 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
61a14cde
AD
975 optional => 1,
976 },
cd9c34d1 977};
52261945 978
1e3baf05
DM
979my $netdesc = {
980 optional => 1,
7f694a71 981 type => 'string', format => $net_fmt,
52261945 982 description => "Specify network devices.",
1e3baf05 983};
52261945 984
1e3baf05
DM
985PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
986
0c9a7596
AD
987my $ipconfig_fmt = {
988 ip => {
989 type => 'string',
990 format => 'pve-ipv4-config',
991 format_description => 'IPv4Format/CIDR',
992 description => 'IPv4 address in CIDR format.',
993 optional => 1,
994 default => 'dhcp',
995 },
996 gw => {
997 type => 'string',
998 format => 'ipv4',
999 format_description => 'GatewayIPv4',
1000 description => 'Default gateway for IPv4 traffic.',
1001 optional => 1,
1002 requires => 'ip',
1003 },
1004 ip6 => {
1005 type => 'string',
1006 format => 'pve-ipv6-config',
1007 format_description => 'IPv6Format/CIDR',
1008 description => 'IPv6 address in CIDR format.',
1009 optional => 1,
1010 default => 'dhcp',
1011 },
1012 gw6 => {
1013 type => 'string',
1014 format => 'ipv6',
1015 format_description => 'GatewayIPv6',
1016 description => 'Default gateway for IPv6 traffic.',
1017 optional => 1,
1018 requires => 'ip6',
1019 },
1020};
1021PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1022my $ipconfigdesc = {
1023 optional => 1,
1024 type => 'string', format => 'pve-qm-ipconfig',
1025 description => <<'EODESCR',
1026cloud-init: Specify IP addresses and gateways for the corresponding interface.
1027
1028IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1029
4df98f2f
TL
1030The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1031gateway should be provided.
988be8d0
ML
1032For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1033cloud-init 19.4 or newer.
0c9a7596 1034
4df98f2f
TL
1035If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1036dhcp on IPv4.
0c9a7596
AD
1037EODESCR
1038};
1039PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1040
1e3baf05
DM
1041for (my $i = 0; $i < $MAX_NETS; $i++) {
1042 $confdesc->{"net$i"} = $netdesc;
9ed7a77c
WB
1043 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1044}
1045
1046foreach my $key (keys %$confdesc_cloudinit) {
1047 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1e3baf05
DM
1048}
1049
83870398
DB
1050PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1051sub pve_verify_cpuset {
1052 my ($set_text, $noerr) = @_;
1053
1054 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1055
1056 if ($@) {
1057 return if $noerr;
1058 die "unable to parse cpuset option\n";
1059 }
1060
1061 return PVE::CpuSet->new($members)->short_string();
1062}
1063
ffa42b86
DC
1064PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1065sub verify_volume_id_or_qm_path {
822c8a07
WB
1066 my ($volid, $noerr) = @_;
1067
6e55f579
FE
1068 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1069
1070 return verify_volume_id_or_absolute_path($volid, $noerr);
1071}
1072
1073PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1074sub verify_volume_id_or_absolute_path {
1075 my ($volid, $noerr) = @_;
1076
1077 return $volid if $volid =~ m|^/|;
ffa42b86 1078
822c8a07
WB
1079 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1080 if ($@) {
d1c1af4b 1081 return if $noerr;
822c8a07
WB
1082 die $@;
1083 }
1084 return $volid;
1085}
1086
bae179aa
DA
1087my $serialdesc = {
1088 optional => 1,
ca0cef26 1089 type => 'string',
1b0b51ed 1090 pattern => '(/dev/.+|socket)',
52261945
DM
1091 description => "Create a serial device inside the VM (n is 0 to 3)",
1092 verbose_description => <<EODESCR,
52261945
DM
1093Create a serial device inside the VM (n is 0 to 3), and pass through a
1094host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1095host side (use 'qm terminal' to open a terminal connection).
bae179aa 1096
4df98f2f
TL
1097NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1098use with special care.
bae179aa 1099
52261945 1100CAUTION: Experimental! User reported problems with this option.
bae179aa
DA
1101EODESCR
1102};
bae179aa 1103
1989a89c
DA
1104my $paralleldesc= {
1105 optional => 1,
ca0cef26 1106 type => 'string',
9ecc8431 1107 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
52261945
DM
1108 description => "Map host parallel devices (n is 0 to 2).",
1109 verbose_description => <<EODESCR,
19672434 1110Map host parallel devices (n is 0 to 2).
1989a89c 1111
4df98f2f
TL
1112NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1113machines - use with special care.
1989a89c 1114
52261945 1115CAUTION: Experimental! User reported problems with this option.
1989a89c
DA
1116EODESCR
1117};
1989a89c
DA
1118
1119for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1120 $confdesc->{"parallel$i"} = $paralleldesc;
1121}
1122
bae179aa
DA
1123for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1124 $confdesc->{"serial$i"} = $serialdesc;
1125}
1126
74c17b7a
SR
1127for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1128 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
040b06b7 1129}
1e3baf05 1130
e0fd2b2f
FE
1131for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1132 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
cdb0931f
DA
1133}
1134
0cf8d56c
DC
1135for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1136 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1e3baf05
DM
1137}
1138
5cfa9f5f
SR
1139my $boot_fmt = {
1140 legacy => {
1141 optional => 1,
1142 default_key => 1,
1143 type => 'string',
1144 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1145 . " Deprecated, use 'order=' instead.",
1146 pattern => '[acdn]{1,4}',
1147 format_description => "[acdn]{1,4}",
1148
1149 # note: this is also the fallback if boot: is not given at all
1150 default => 'cdn',
1151 },
1152 order => {
1153 optional => 1,
1154 type => 'string',
1155 format => 'pve-qm-bootdev-list',
1156 format_description => "device[;device...]",
1157 description => <<EODESC,
1158The guest will attempt to boot from devices in the order they appear here.
1159
1160Disks, optical drives and passed-through storage USB devices will be directly
1161booted from, NICs will load PXE, and PCIe devices will either behave like disks
1162(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1163
1164Note that only devices in this list will be marked as bootable and thus loaded
1165by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1166(e.g. software-raid), you need to specify all of them here.
1167
1168Overrides the deprecated 'legacy=[acdn]*' value when given.
1169EODESC
1170 },
1171};
1172PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1173
1174PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1175sub verify_bootdev {
1176 my ($dev, $noerr) = @_;
1177
f9dde219
SR
1178 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1179 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
5cfa9f5f
SR
1180
1181 my $check = sub {
1182 my ($base) = @_;
1183 return 0 if $dev !~ m/^$base\d+$/;
1184 return 0 if !$confdesc->{$dev};
1185 return 1;
1186 };
1187
1188 return $dev if $check->("net");
1189 return $dev if $check->("usb");
1190 return $dev if $check->("hostpci");
1191
d1c1af4b 1192 return if $noerr;
5cfa9f5f
SR
1193 die "invalid boot device '$dev'\n";
1194}
1195
1196sub print_bootorder {
1197 my ($devs) = @_;
4c27b18c 1198 return "" if !@$devs;
5cfa9f5f
SR
1199 my $data = { order => join(';', @$devs) };
1200 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1201}
1202
1e3baf05
DM
1203my $kvm_api_version = 0;
1204
1205sub kvm_version {
1e3baf05
DM
1206 return $kvm_api_version if $kvm_api_version;
1207
808a65b5 1208 open my $fh, '<', '/dev/kvm' or return;
1e3baf05 1209
646f2df4
WB
1210 # 0xae00 => KVM_GET_API_VERSION
1211 $kvm_api_version = ioctl($fh, 0xae00, 0);
808a65b5 1212 close($fh);
1e3baf05 1213
646f2df4 1214 return $kvm_api_version;
1e3baf05
DM
1215}
1216
1476b99f
DC
1217my $kvm_user_version = {};
1218my $kvm_mtime = {};
1e3baf05
DM
1219
1220sub kvm_user_version {
1476b99f 1221 my ($binary) = @_;
1e3baf05 1222
1476b99f
DC
1223 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1224 my $st = stat($binary);
1e3baf05 1225
1476b99f
DC
1226 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1227 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1228 $cachedmtime == $st->mtime;
1229
1230 $kvm_user_version->{$binary} = 'unknown';
1231 $kvm_mtime->{$binary} = $st->mtime;
1e3baf05 1232
09b11429
TL
1233 my $code = sub {
1234 my $line = shift;
1235 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1476b99f 1236 $kvm_user_version->{$binary} = $2;
09b11429
TL
1237 }
1238 };
19672434 1239
1476b99f 1240 eval { run_command([$binary, '--version'], outfunc => $code); };
09b11429 1241 warn $@ if $@;
1e3baf05 1242
1476b99f 1243 return $kvm_user_version->{$binary};
1e3baf05
DM
1244
1245}
4df98f2f
TL
1246my sub extract_version {
1247 my ($machine_type, $version) = @_;
1248 $version = kvm_user_version() if !defined($version);
8eb73377 1249 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
4df98f2f 1250}
1e3baf05 1251
db70021b
TL
1252sub kernel_has_vhost_net {
1253 return -c '/dev/vhost-net';
1254}
1e3baf05 1255
1e3baf05
DM
1256sub option_exists {
1257 my $key = shift;
1258 return defined($confdesc->{$key});
19672434 1259}
1e3baf05 1260
1e3baf05 1261my $cdrom_path;
1e3baf05
DM
1262sub get_cdrom_path {
1263
259470ee 1264 return $cdrom_path if defined($cdrom_path);
1e3baf05 1265
c3d15108
TL
1266 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1267
1268 if (!defined($cdrom_path)) {
490b7308
SS
1269 log_warn("no physical CD-ROM available, ignoring");
1270 $cdrom_path = '';
1271 }
259470ee
SS
1272
1273 return $cdrom_path;
1e3baf05
DM
1274}
1275
1276sub get_iso_path {
1277 my ($storecfg, $vmid, $cdrom) = @_;
1278
1279 if ($cdrom eq 'cdrom') {
1280 return get_cdrom_path();
1281 } elsif ($cdrom eq 'none') {
1282 return '';
1283 } elsif ($cdrom =~ m|^/|) {
1284 return $cdrom;
1285 } else {
6b64503e 1286 return PVE::Storage::path($storecfg, $cdrom);
1e3baf05
DM
1287 }
1288}
1289
1290# try to convert old style file names to volume IDs
1291sub filename_to_volume_id {
1292 my ($vmid, $file, $media) = @_;
1293
0c9a7596 1294 if (!($file eq 'none' || $file eq 'cdrom' ||
1e3baf05 1295 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
19672434 1296
d1c1af4b 1297 return if $file =~ m|/|;
19672434 1298
1e3baf05
DM
1299 if ($media && $media eq 'cdrom') {
1300 $file = "local:iso/$file";
1301 } else {
1302 $file = "local:$vmid/$file";
1303 }
1304 }
1305
1306 return $file;
1307}
1308
1309sub verify_media_type {
1310 my ($opt, $vtype, $media) = @_;
1311
1312 return if !$media;
1313
1314 my $etype;
1315 if ($media eq 'disk') {
a125592c 1316 $etype = 'images';
1e3baf05
DM
1317 } elsif ($media eq 'cdrom') {
1318 $etype = 'iso';
1319 } else {
1320 die "internal error";
1321 }
1322
1323 return if ($vtype eq $etype);
19672434 1324
1e3baf05
DM
1325 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1326}
1327
1328sub cleanup_drive_path {
1329 my ($opt, $storecfg, $drive) = @_;
1330
1331 # try to convert filesystem paths to volume IDs
1332
1333 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1334 ($drive->{file} !~ m|^/dev/.+|) &&
1335 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
19672434 1336 ($drive->{file} !~ m/^\d+$/)) {
1e3baf05 1337 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
4df98f2f
TL
1338 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1339 if !$vtype;
1e3baf05
DM
1340 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1341 verify_media_type($opt, $vtype, $drive->{media});
1342 $drive->{file} = $volid;
1343 }
1344
1345 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1346}
1347
b3c2bdd1
DM
1348sub parse_hotplug_features {
1349 my ($data) = @_;
1350
1351 my $res = {};
1352
1353 return $res if $data eq '0';
a1b7d579 1354
b3c2bdd1
DM
1355 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1356
45827685 1357 foreach my $feature (PVE::Tools::split_list($data)) {
94ec5e7c 1358 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
b3c2bdd1
DM
1359 $res->{$1} = 1;
1360 } else {
596a0a20 1361 die "invalid hotplug feature '$feature'\n";
b3c2bdd1
DM
1362 }
1363 }
1364 return $res;
1365}
1366
1367PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1368sub pve_verify_hotplug_features {
1369 my ($value, $noerr) = @_;
1370
1371 return $value if parse_hotplug_features($value);
1372
d1c1af4b 1373 return if $noerr;
b3c2bdd1
DM
1374
1375 die "unable to parse hotplug option\n";
1376}
1377
28ef82d3
DM
1378sub scsi_inquiry {
1379 my($fh, $noerr) = @_;
1380
1381 my $SG_IO = 0x2285;
1382 my $SG_GET_VERSION_NUM = 0x2282;
1383
1384 my $versionbuf = "\x00" x 8;
1385 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1386 if (!$ret) {
1387 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
d1c1af4b 1388 return;
28ef82d3 1389 }
97d62eb7 1390 my $version = unpack("I", $versionbuf);
28ef82d3
DM
1391 if ($version < 30000) {
1392 die "scsi generic interface too old\n" if !$noerr;
d1c1af4b 1393 return;
28ef82d3 1394 }
97d62eb7 1395
28ef82d3
DM
1396 my $buf = "\x00" x 36;
1397 my $sensebuf = "\x00" x 8;
f334aa3e 1398 my $cmd = pack("C x3 C x1", 0x12, 36);
97d62eb7 1399
28ef82d3
DM
1400 # see /usr/include/scsi/sg.h
1401 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1402
f606d5bd
TL
1403 my $packet = pack(
1404 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1405 );
28ef82d3
DM
1406
1407 $ret = ioctl($fh, $SG_IO, $packet);
1408 if (!$ret) {
1409 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
d1c1af4b 1410 return;
28ef82d3 1411 }
97d62eb7 1412
28ef82d3
DM
1413 my @res = unpack($sg_io_hdr_t, $packet);
1414 if ($res[17] || $res[18]) {
1415 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
d1c1af4b 1416 return;
28ef82d3
DM
1417 }
1418
1419 my $res = {};
f606d5bd 1420 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
28ef82d3 1421
f606d5bd
TL
1422 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1423 $res->{type} &= 0x1F;
09984754 1424
28ef82d3
DM
1425 return $res;
1426}
1427
1428sub path_is_scsi {
1429 my ($path) = @_;
1430
d1c1af4b 1431 my $fh = IO::File->new("+<$path") || return;
28ef82d3
DM
1432 my $res = scsi_inquiry($fh, 1);
1433 close($fh);
1434
1435 return $res;
1436}
1437
db656e5f 1438sub print_tabletdevice_full {
d559309f 1439 my ($conf, $arch) = @_;
b467f79a 1440
3392d6ca 1441 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f
DM
1442
1443 # we use uhci for old VMs because tablet driver was buggy in older qemu
d559309f 1444 my $usbbus;
2b938c7d 1445 if ($q35 || $arch eq 'aarch64') {
d559309f
WB
1446 $usbbus = 'ehci';
1447 } else {
1448 $usbbus = 'uhci';
1449 }
b467f79a 1450
db656e5f
DM
1451 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1452}
1453
d559309f 1454sub print_keyboarddevice_full {
6971c38e 1455 my ($conf, $arch) = @_;
d559309f 1456
d1c1af4b 1457 return if $arch ne 'aarch64';
d559309f
WB
1458
1459 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1460}
1461
a183df68
TL
1462my sub get_drive_id {
1463 my ($drive) = @_;
1464 return "$drive->{interface}$drive->{index}";
1465}
1466
ca916ecc 1467sub print_drivedevice_full {
d559309f 1468 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
ca916ecc
DA
1469
1470 my $device = '';
1471 my $maxdev = 0;
19672434 1472
a183df68 1473 my $drive_id = get_drive_id($drive);
ca916ecc 1474 if ($drive->{interface} eq 'virtio') {
4df98f2f
TL
1475 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1476 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1477 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
2ed36a41 1478 } elsif ($drive->{interface} eq 'scsi') {
6731a4cf 1479
ee034f5c 1480 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
2ed36a41
DM
1481 my $unit = $drive->{index} % $maxdev;
1482 my $devicetype = 'hd';
69bcf246
WB
1483 my $path = '';
1484 if (drive_is_cdrom($drive)) {
1485 $devicetype = 'cd';
29b19529 1486 } else {
69bcf246
WB
1487 if ($drive->{file} =~ m|^/|) {
1488 $path = $drive->{file};
1489 if (my $info = path_is_scsi($path)) {
8e3c33ab 1490 if ($info->{type} == 0 && $drive->{scsiblock}) {
69bcf246
WB
1491 $devicetype = 'block';
1492 } elsif ($info->{type} == 1) { # tape
1493 $devicetype = 'generic';
1494 }
1495 }
1496 } else {
1497 $path = PVE::Storage::path($storecfg, $drive->{file});
1498 }
1499
a034e3d6 1500 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
4df98f2f 1501 my $version = extract_version($machine_type, kvm_user_version());
a034e3d6 1502 if ($path =~ m/^iscsi\:\/\// &&
2ea5fb7e 1503 !min_version($version, 4, 1)) {
69bcf246
WB
1504 $devicetype = 'generic';
1505 }
1506 }
1507
ef88eaaa 1508 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
4df98f2f 1509 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
69bcf246 1510 } else {
4df98f2f
TL
1511 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1512 .",lun=$drive->{index}";
69bcf246 1513 }
4df98f2f 1514 $device .= ",drive=drive-$drive_id,id=$drive_id";
cdd20088 1515
6c875f9f
NC
1516 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1517 $device .= ",rotation_rate=1";
1518 }
e741c516 1519 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
6c875f9f
NC
1520
1521 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
e0fd2b2f 1522 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
2ed36a41
DM
1523 my $controller = int($drive->{index} / $maxdev);
1524 my $unit = $drive->{index} % $maxdev;
b155086b
FE
1525
1526 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1527 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1528 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1529 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1530 # were before. Move odd ones up by 2 where they don't clash.
1531 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1532 $controller += 2 * ($unit % 2);
1533 $unit = 0;
1534 }
1535
2ed36a41
DM
1536 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1537
6c875f9f
NC
1538 $device = "ide-$devicetype";
1539 if ($drive->{interface} eq 'ide') {
1540 $device .= ",bus=ide.$controller,unit=$unit";
1541 } else {
1542 $device .= ",bus=ahci$controller.$unit";
1543 }
4df98f2f 1544 $device .= ",drive=drive-$drive_id,id=$drive_id";
6c875f9f
NC
1545
1546 if ($devicetype eq 'hd') {
1547 if (my $model = $drive->{model}) {
1548 $model = URI::Escape::uri_unescape($model);
1549 $device .= ",model=$model";
1550 }
1551 if ($drive->{ssd}) {
1552 $device .= ",rotation_rate=1";
1553 }
0f2812c2 1554 }
e741c516 1555 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
2ed36a41
DM
1556 } elsif ($drive->{interface} eq 'usb') {
1557 die "implement me";
1558 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1559 } else {
1560 die "unsupported interface type";
ca916ecc
DA
1561 }
1562
3b408e82
DM
1563 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1564
a70e7e6c
TL
1565 if (my $serial = $drive->{serial}) {
1566 $serial = URI::Escape::uri_unescape($serial);
1567 $device .= ",serial=$serial";
1568 }
1569
1570
ca916ecc
DA
1571 return $device;
1572}
1573
15b21acc 1574sub get_initiator_name {
46f58b5f 1575 my $initiator;
15b21acc 1576
d1c1af4b 1577 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
46f58b5f
DM
1578 while (defined(my $line = <$fh>)) {
1579 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
15b21acc
MR
1580 $initiator = $1;
1581 last;
1582 }
46f58b5f
DM
1583 $fh->close();
1584
15b21acc
MR
1585 return $initiator;
1586}
1587
eec9f9fe
FE
1588my sub storage_allows_io_uring_default {
1589 my ($scfg, $cache_direct) = @_;
1590
1591 # io_uring with cache mode writeback or writethrough on krbd will hang...
1592 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1593
1594 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1595 # sometimes, just plain disable...
1596 return if $scfg && $scfg->{type} eq 'lvm';
1597
1598 # io_uring causes problems when used with CIFS since kernel 5.15
1599 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1600 return if $scfg && $scfg->{type} eq 'cifs';
1601
1602 return 1;
1603}
1604
b7071d6c
FE
1605my sub drive_uses_cache_direct {
1606 my ($drive, $scfg) = @_;
1607
1608 my $cache_direct = 0;
1609
1610 if (my $cache = $drive->{cache}) {
1611 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1612 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1613 $cache_direct = 1;
1614 }
1615
1616 return $cache_direct;
1617}
1618
776c5f50 1619sub print_drive_commandline_full {
6d5673c3 1620 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1e3baf05 1621
d81f0f09
DM
1622 my $path;
1623 my $volid = $drive->{file};
5921764c 1624 my $format = $drive->{format};
a183df68 1625 my $drive_id = get_drive_id($drive);
370b05e7 1626
0fe779a6
WB
1627 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1628 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1629
d81f0f09
DM
1630 if (drive_is_cdrom($drive)) {
1631 $path = get_iso_path($storecfg, $vmid, $volid);
a183df68 1632 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
d81f0f09 1633 } else {
d81f0f09
DM
1634 if ($storeid) {
1635 $path = PVE::Storage::path($storecfg, $volid);
5921764c 1636 $format //= qemu_img_format($scfg, $volname);
d81f0f09
DM
1637 } else {
1638 $path = $volid;
5921764c 1639 $format //= "raw";
d81f0f09
DM
1640 }
1641 }
1642
5921764c
SR
1643 my $is_rbd = $path =~ m/^rbd:/;
1644
1e3baf05 1645 my $opts = '';
5921764c 1646 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1e3baf05 1647 foreach my $o (@qemu_drive_options) {
5fc74861 1648 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
19672434 1649 }
8a267708
DC
1650
1651 # snapshot only accepts on|off
1652 if (defined($drive->{snapshot})) {
1653 my $v = $drive->{snapshot} ? 'on' : 'off';
1654 $opts .= ",snapshot=$v";
1655 }
1656
1f91f7b4
TL
1657 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1658 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
12e1d472
DC
1659 }
1660
fb8e95a2
WB
1661 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1662 my ($dir, $qmpname) = @$type;
1663 if (my $v = $drive->{"mbps$dir"}) {
1664 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1665 }
1666 if (my $v = $drive->{"mbps${dir}_max"}) {
1667 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1668 }
1669 if (my $v = $drive->{"bps${dir}_max_length"}) {
1670 $opts .= ",throttling.bps$qmpname-max-length=$v";
1671 }
1672 if (my $v = $drive->{"iops${dir}"}) {
1673 $opts .= ",throttling.iops$qmpname=$v";
1674 }
1675 if (my $v = $drive->{"iops${dir}_max"}) {
8aca1654 1676 $opts .= ",throttling.iops$qmpname-max=$v";
fb8e95a2
WB
1677 }
1678 if (my $v = $drive->{"iops${dir}_max_length"}) {
8aca1654 1679 $opts .= ",throttling.iops$qmpname-max-length=$v";
fb8e95a2
WB
1680 }
1681 }
1682
5921764c
SR
1683 if ($pbs_name) {
1684 $format = "rbd" if $is_rbd;
a183df68
TL
1685 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1686 if !$format;
5921764c
SR
1687 $opts .= ",format=alloc-track,file.driver=$format";
1688 } elsif ($format) {
1689 $opts .= ",format=$format";
1690 }
d81f0f09 1691
b7071d6c 1692 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
b2ee900e 1693
b7071d6c 1694 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
b2ee900e 1695
b2ee900e 1696 if (!$drive->{aio}) {
eec9f9fe 1697 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
6d5673c3
SR
1698 # io_uring supports all cache modes
1699 $opts .= ",aio=io_uring";
b2ee900e 1700 } else {
6d5673c3
SR
1701 # aio native works only with O_DIRECT
1702 if($cache_direct) {
1703 $opts .= ",aio=native";
1704 } else {
1705 $opts .= ",aio=threads";
1706 }
b2ee900e
WB
1707 }
1708 }
11490cf2 1709
6e47c3b4
WB
1710 if (!drive_is_cdrom($drive)) {
1711 my $detectzeroes;
7d4e30f3 1712 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
6e47c3b4
WB
1713 $detectzeroes = 'off';
1714 } elsif ($drive->{discard}) {
1715 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1716 } else {
1717 # This used to be our default with discard not being specified:
1718 $detectzeroes = 'on';
1719 }
5921764c
SR
1720
1721 # note: 'detect-zeroes' works per blockdev and we want it to persist
1722 # after the alloc-track is removed, so put it on 'file' directly
1723 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1724 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
6e47c3b4 1725 }
f1e05305 1726
5921764c
SR
1727 if ($pbs_name) {
1728 $opts .= ",backing=$pbs_name";
1729 $opts .= ",auto-remove=on";
1730 }
1731
1732 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1733 my $file_param = "file";
1734 if ($pbs_name) {
1735 # non-rbd drivers require the underlying file to be a seperate block
1736 # node, so add a second .file indirection
1737 $file_param .= ".file" if !$is_rbd;
1738 $file_param .= ".filename";
1739 }
1740 my $pathinfo = $path ? "$file_param=$path," : '';
1e3baf05 1741
3ebfcc86 1742 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
1743}
1744
5921764c
SR
1745sub print_pbs_blockdev {
1746 my ($pbs_conf, $pbs_name) = @_;
1747 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1748 $blockdev .= ",repository=$pbs_conf->{repository}";
2dda626d 1749 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
5921764c
SR
1750 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1751 $blockdev .= ",archive=$pbs_conf->{archive}";
1752 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1753 return $blockdev;
1754}
1755
cc4d6182 1756sub print_netdevice_full {
0c03a390 1757 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
cc4d6182 1758
cc4d6182
DA
1759 my $device = $net->{model};
1760 if ($net->{model} eq 'virtio') {
1761 $device = 'virtio-net-pci';
1762 };
1763
d559309f 1764 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
5e2068d2 1765 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
a9410357 1766 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
4df98f2f
TL
1767 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1768 # and out of each queue plus one config interrupt and control vector queue
a9410357
AD
1769 my $vectors = $net->{queues} * 2 + 2;
1770 $tmpstr .= ",vectors=$vectors,mq=on";
0c03a390
AD
1771 if (min_version($machine_version, 7, 1)) {
1772 $tmpstr .= ",packed=on";
1773 }
a9410357 1774 }
620d6b32
AD
1775
1776 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1777 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1778 }
1779
cc4d6182 1780 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
ba9e1000 1781
0530177b
TL
1782 if (my $mtu = $net->{mtu}) {
1783 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1784 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1785 if ($mtu == 1) {
1786 $mtu = $bridge_mtu;
1787 } elsif ($mtu < 576) {
1788 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1789 } elsif ($mtu > $bridge_mtu) {
1790 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1791 }
1792 $tmpstr .= ",host_mtu=$mtu";
1793 } else {
1794 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
61a14cde 1795 }
61a14cde
AD
1796 }
1797
ba9e1000
DM
1798 if ($use_old_bios_files) {
1799 my $romfile;
1800 if ($device eq 'virtio-net-pci') {
1801 $romfile = 'pxe-virtio.rom';
1802 } elsif ($device eq 'e1000') {
1803 $romfile = 'pxe-e1000.rom';
e83dd50a
TL
1804 } elsif ($device eq 'e1000e') {
1805 $romfile = 'pxe-e1000e.rom';
ba9e1000
DM
1806 } elsif ($device eq 'ne2k') {
1807 $romfile = 'pxe-ne2k_pci.rom';
1808 } elsif ($device eq 'pcnet') {
1809 $romfile = 'pxe-pcnet.rom';
1810 } elsif ($device eq 'rtl8139') {
1811 $romfile = 'pxe-rtl8139.rom';
1812 }
1813 $tmpstr .= ",romfile=$romfile" if $romfile;
1814 }
1815
cc4d6182
DA
1816 return $tmpstr;
1817}
1818
1819sub print_netdev_full {
d559309f 1820 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
cc4d6182
DA
1821
1822 my $i = '';
1823 if ($netid =~ m/^net(\d+)$/) {
1824 $i = int($1);
1825 }
1826
1827 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1828
1829 my $ifname = "tap${vmid}i$i";
1830
1831 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1832 die "interface name '$ifname' is too long (max 15 character)\n"
1833 if length($ifname) >= 16;
1834
1835 my $vhostparam = '';
6f0cb675 1836 if (is_native($arch)) {
db70021b 1837 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
6f0cb675 1838 }
cc4d6182
DA
1839
1840 my $vmname = $conf->{name} || "vm$vmid";
1841
a9410357 1842 my $netdev = "";
208ba94e 1843 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
a9410357 1844
cc4d6182 1845 if ($net->{bridge}) {
4df98f2f
TL
1846 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1847 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
cc4d6182 1848 } else {
a9410357 1849 $netdev = "type=user,id=$netid,hostname=$vmname";
cc4d6182 1850 }
a9410357
AD
1851
1852 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1853
1854 return $netdev;
cc4d6182 1855}
1e3baf05 1856
55655ebc
DC
1857my $vga_map = {
1858 'cirrus' => 'cirrus-vga',
1859 'std' => 'VGA',
1860 'vmware' => 'vmware-svga',
1861 'virtio' => 'virtio-vga',
6f070e39 1862 'virtio-gl' => 'virtio-vga-gl',
55655ebc
DC
1863};
1864
1865sub print_vga_device {
2ea5fb7e 1866 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
55655ebc
DC
1867
1868 my $type = $vga_map->{$vga->{type}};
86c9fafe 1869 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
d559309f
WB
1870 $type = 'virtio-gpu';
1871 }
55655ebc 1872 my $vgamem_mb = $vga->{memory};
6021c7a5
AL
1873
1874 my $max_outputs = '';
55655ebc
DC
1875 if ($qxlnum) {
1876 $type = $id ? 'qxl' : 'qxl-vga';
6021c7a5 1877
c5a4c92c 1878 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
6021c7a5 1879 # set max outputs so linux can have up to 4 qxl displays with one device
2ea5fb7e 1880 if (min_version($machine_version, 4, 1)) {
9e8976ea
TL
1881 $max_outputs = ",max_outputs=4";
1882 }
6021c7a5 1883 }
55655ebc 1884 }
6021c7a5 1885
55655ebc
DC
1886 die "no devicetype for $vga->{type}\n" if !$type;
1887
1888 my $memory = "";
1889 if ($vgamem_mb) {
6f070e39 1890 if ($vga->{type} =~ /^virtio/) {
55655ebc
DC
1891 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1892 $memory = ",max_hostmem=$bytes";
1893 } elsif ($qxlnum) {
1894 # from https://www.spice-space.org/multiple-monitors.html
1895 $memory = ",vgamem_mb=$vga->{memory}";
1896 my $ram = $vgamem_mb * 4;
1897 my $vram = $vgamem_mb * 2;
1898 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1899 } else {
1900 $memory = ",vgamem_mb=$vga->{memory}";
1901 }
1902 } elsif ($qxlnum && $id) {
1903 $memory = ",ram_size=67108864,vram_size=33554432";
1904 }
1905
789fe8e8
AL
1906 my $edidoff = "";
1907 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
b5d32c6b 1908 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
789fe8e8
AL
1909 }
1910
3392d6ca 1911 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
55655ebc
DC
1912 my $vgaid = "vga" . ($id // '');
1913 my $pciaddr;
1914 if ($q35 && $vgaid eq 'vga') {
daadd5a4 1915 # the first display uses pcie.0 bus on q35 machines
d559309f 1916 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
55655ebc 1917 } else {
d559309f 1918 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
55655ebc
DC
1919 }
1920
9f979d9f 1921 if ($vga->{type} eq 'virtio-gl') {
8884a8bf
TL
1922 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1923 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1924 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
9f979d9f 1925
8884a8bf
TL
1926 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1927 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
9f979d9f
SI
1928 }
1929
789fe8e8 1930 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
55655ebc
DC
1931}
1932
ffc0d8c7
WB
1933sub parse_number_sets {
1934 my ($set) = @_;
1935 my $res = [];
1936 foreach my $part (split(/;/, $set)) {
1937 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1938 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1939 push @$res, [ $1, $2 ];
2ed5d572 1940 } else {
ffc0d8c7 1941 die "invalid range: $part\n";
2ed5d572
AD
1942 }
1943 }
ffc0d8c7
WB
1944 return $res;
1945}
2ed5d572 1946
ffc0d8c7
WB
1947sub parse_numa {
1948 my ($data) = @_;
1949
4df98f2f 1950 my $res = parse_property_string($numa_fmt, $data);
ffc0d8c7
WB
1951 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1952 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
2ed5d572
AD
1953 return $res;
1954}
1955
1e3baf05
DM
1956# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1957sub parse_net {
4ddd2ca2 1958 my ($data, $disable_mac_autogen) = @_;
1e3baf05 1959
4df98f2f 1960 my $res = eval { parse_property_string($net_fmt, $data) };
cd9c34d1
WB
1961 if ($@) {
1962 warn $@;
d1c1af4b 1963 return;
1e3baf05 1964 }
4ddd2ca2 1965 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
b5b99790
WB
1966 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1967 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1968 }
0c9a7596
AD
1969 return $res;
1970}
1971
1972# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1973sub parse_ipconfig {
1974 my ($data) = @_;
1975
4df98f2f 1976 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
0c9a7596
AD
1977 if ($@) {
1978 warn $@;
d1c1af4b 1979 return;
0c9a7596
AD
1980 }
1981
1982 if ($res->{gw} && !$res->{ip}) {
1983 warn 'gateway specified without specifying an IP address';
d1c1af4b 1984 return;
0c9a7596
AD
1985 }
1986 if ($res->{gw6} && !$res->{ip6}) {
1987 warn 'IPv6 gateway specified without specifying an IPv6 address';
d1c1af4b 1988 return;
0c9a7596
AD
1989 }
1990 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1991 warn 'gateway specified together with DHCP';
d1c1af4b 1992 return;
0c9a7596
AD
1993 }
1994 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1995 # gw6 + auto/dhcp
1996 warn "IPv6 gateway specified together with $res->{ip6} address";
d1c1af4b 1997 return;
0c9a7596
AD
1998 }
1999
2000 if (!$res->{ip} && !$res->{ip6}) {
2001 return { ip => 'dhcp', ip6 => 'dhcp' };
2002 }
2003
1e3baf05
DM
2004 return $res;
2005}
2006
2007sub print_net {
2008 my $net = shift;
2009
cd9c34d1 2010 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1e3baf05
DM
2011}
2012
2013sub add_random_macs {
2014 my ($settings) = @_;
2015
2016 foreach my $opt (keys %$settings) {
2017 next if $opt !~ m/^net(\d+)$/;
2018 my $net = parse_net($settings->{$opt});
2019 next if !$net;
2020 $settings->{$opt} = print_net($net);
2021 }
2022}
2023
055d554d
DM
2024sub vm_is_volid_owner {
2025 my ($storecfg, $vmid, $volid) = @_;
2026
2027 if ($volid !~ m|^/|) {
2028 my ($path, $owner);
2029 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2030 if ($owner && ($owner == $vmid)) {
2031 return 1;
2032 }
2033 }
2034
d1c1af4b 2035 return;
055d554d
DM
2036}
2037
055d554d
DM
2038sub vmconfig_register_unused_drive {
2039 my ($storecfg, $vmid, $conf, $drive) = @_;
2040
2d9ddec5
WB
2041 if (drive_is_cloudinit($drive)) {
2042 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2043 warn $@ if $@;
95a5135d 2044 delete $conf->{cloudinit};
2d9ddec5 2045 } elsif (!drive_is_cdrom($drive)) {
055d554d
DM
2046 my $volid = $drive->{file};
2047 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
8793d495 2048 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
055d554d
DM
2049 }
2050 }
2051}
2052
1f30ac3a 2053# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
ff6ffe20 2054my $smbios1_fmt = {
bd27e851
WB
2055 uuid => {
2056 type => 'string',
2057 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2058 format_description => 'UUID',
52261945 2059 description => "Set SMBIOS1 UUID.",
bd27e851
WB
2060 optional => 1,
2061 },
2062 version => {
2063 type => 'string',
1f30ac3a
CE
2064 pattern => '[A-Za-z0-9+\/]+={0,2}',
2065 format_description => 'Base64 encoded string',
52261945 2066 description => "Set SMBIOS1 version.",
bd27e851
WB
2067 optional => 1,
2068 },
2069 serial => {
2070 type => 'string',
1f30ac3a
CE
2071 pattern => '[A-Za-z0-9+\/]+={0,2}',
2072 format_description => 'Base64 encoded string',
52261945 2073 description => "Set SMBIOS1 serial number.",
bd27e851
WB
2074 optional => 1,
2075 },
2076 manufacturer => {
2077 type => 'string',
1f30ac3a
CE
2078 pattern => '[A-Za-z0-9+\/]+={0,2}',
2079 format_description => 'Base64 encoded string',
52261945 2080 description => "Set SMBIOS1 manufacturer.",
bd27e851
WB
2081 optional => 1,
2082 },
2083 product => {
2084 type => 'string',
1f30ac3a
CE
2085 pattern => '[A-Za-z0-9+\/]+={0,2}',
2086 format_description => 'Base64 encoded string',
52261945 2087 description => "Set SMBIOS1 product ID.",
bd27e851
WB
2088 optional => 1,
2089 },
2090 sku => {
2091 type => 'string',
1f30ac3a
CE
2092 pattern => '[A-Za-z0-9+\/]+={0,2}',
2093 format_description => 'Base64 encoded string',
52261945 2094 description => "Set SMBIOS1 SKU string.",
bd27e851
WB
2095 optional => 1,
2096 },
2097 family => {
2098 type => 'string',
1f30ac3a
CE
2099 pattern => '[A-Za-z0-9+\/]+={0,2}',
2100 format_description => 'Base64 encoded string',
52261945 2101 description => "Set SMBIOS1 family string.",
bd27e851
WB
2102 optional => 1,
2103 },
1f30ac3a
CE
2104 base64 => {
2105 type => 'boolean',
2106 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2107 optional => 1,
2108 },
2796e7d5
DM
2109};
2110
2796e7d5
DM
2111sub parse_smbios1 {
2112 my ($data) = @_;
2113
4df98f2f 2114 my $res = eval { parse_property_string($smbios1_fmt, $data) };
bd27e851 2115 warn $@ if $@;
2796e7d5
DM
2116 return $res;
2117}
2118
cd11416f
DM
2119sub print_smbios1 {
2120 my ($smbios1) = @_;
ff6ffe20 2121 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
cd11416f
DM
2122}
2123
ff6ffe20 2124PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2796e7d5 2125
0ea9541d
DM
2126sub parse_watchdog {
2127 my ($value) = @_;
2128
d1c1af4b 2129 return if !$value;
0ea9541d 2130
4df98f2f 2131 my $res = eval { parse_property_string($watchdog_fmt, $value) };
ec3582b5 2132 warn $@ if $@;
0ea9541d
DM
2133 return $res;
2134}
2135
9d66b397 2136sub parse_guest_agent {
a2af1bbe 2137 my ($conf) = @_;
9d66b397 2138
a2af1bbe 2139 return {} if !defined($conf->{agent});
9d66b397 2140
a2af1bbe 2141 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
9d66b397
SI
2142 warn $@ if $@;
2143
2144 # if the agent is disabled ignore the other potentially set properties
2145 return {} if !$res->{enabled};
2146 return $res;
2147}
2148
a2af1bbe
TL
2149sub get_qga_key {
2150 my ($conf, $key) = @_;
2151 return undef if !defined($conf->{agent});
2152
2153 my $agent = parse_guest_agent($conf);
2154 return $agent->{$key};
2155}
2156
55655ebc
DC
2157sub parse_vga {
2158 my ($value) = @_;
2159
2160 return {} if !$value;
4df98f2f 2161 my $res = eval { parse_property_string($vga_fmt, $value) };
55655ebc
DC
2162 warn $@ if $@;
2163 return $res;
2164}
2165
2cf61f33
SR
2166sub parse_rng {
2167 my ($value) = @_;
2168
d1c1af4b 2169 return if !$value;
2cf61f33 2170
4df98f2f 2171 my $res = eval { parse_property_string($rng_fmt, $value) };
2cf61f33
SR
2172 warn $@ if $@;
2173 return $res;
2174}
2175
26b443c8
TL
2176sub parse_meta_info {
2177 my ($value) = @_;
2178
2179 return if !$value;
2180
2181 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2182 warn $@ if $@;
2183 return $res;
2184}
2185
2186sub new_meta_info_string {
2187 my () = @_; # for now do not allow to override any value
2188
2189 return PVE::JSONSchema::print_property_string(
2190 {
af2a1a1c 2191 'creation-qemu' => kvm_user_version(),
26b443c8
TL
2192 ctime => "". int(time()),
2193 },
2194 $meta_info_fmt
2195 );
2196}
2197
cc181036
TL
2198sub qemu_created_version_fixups {
2199 my ($conf, $forcemachine, $kvmver) = @_;
2200
2201 my $meta = parse_meta_info($conf->{meta}) // {};
2202 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2203
2204 # check if we need to apply some handling for VMs that always use the latest machine version but
2205 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2206 # would be required (we do not want to pin machine version for non-windows OS type)
2207 if (
2208 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2209 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2210 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2211 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2212 ) {
2213 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2214 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2215 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2216 # and thus with the predictable interface naming of systemd
2217 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2218 }
2219 }
2220 return;
2221}
2222
1e3baf05
DM
2223# add JSON properties for create and set function
2224sub json_config_properties {
c1accf9d 2225 my ($prop, $with_disk_alloc) = @_;
1e3baf05 2226
5a08fb9c
TL
2227 my $skip_json_config_opts = {
2228 parent => 1,
2229 snaptime => 1,
2230 vmstate => 1,
2231 runningmachine => 1,
2232 runningcpu => 1,
26b443c8 2233 meta => 1,
5a08fb9c
TL
2234 };
2235
1e3baf05 2236 foreach my $opt (keys %$confdesc) {
5a08fb9c 2237 next if $skip_json_config_opts->{$opt};
c1accf9d
FE
2238
2239 if ($with_disk_alloc && is_valid_drivename($opt)) {
2240 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2241 } else {
2242 $prop->{$opt} = $confdesc->{$opt};
2243 }
1e3baf05
DM
2244 }
2245
2246 return $prop;
2247}
2248
39051ac0
FE
2249# Properties that we can read from an OVF file
2250sub json_ovf_properties {
2251 my $prop = {};
2252
2253 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2254 $prop->{$device} = {
2255 type => 'string',
2256 format => 'pve-volume-id-or-absolute-path',
2257 description => "Disk image that gets imported to $device",
2258 optional => 1,
2259 };
2260 }
2261
2262 $prop->{cores} = {
2263 type => 'integer',
2264 description => "The number of CPU cores.",
2265 optional => 1,
2266 };
2267 $prop->{memory} = {
2268 type => 'integer',
2269 description => "Amount of RAM for the VM in MB.",
2270 optional => 1,
2271 };
2272 $prop->{name} = {
2273 type => 'string',
2274 description => "Name of the VM.",
2275 optional => 1,
2276 };
2277
2278 return $prop;
2279}
2280
d41121fd
DM
2281# return copy of $confdesc_cloudinit to generate documentation
2282sub cloudinit_config_properties {
2283
2284 return dclone($confdesc_cloudinit);
2285}
2286
f16cf6c3
WB
2287sub cloudinit_pending_properties {
2288 my $p = {
2289 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2290 name => 1,
2291 };
2292 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2293 return $p;
2294}
2295
1e3baf05
DM
2296sub check_type {
2297 my ($key, $value) = @_;
2298
2299 die "unknown setting '$key'\n" if !$confdesc->{$key};
2300
2301 my $type = $confdesc->{$key}->{type};
2302
6b64503e 2303 if (!defined($value)) {
1e3baf05
DM
2304 die "got undefined value\n";
2305 }
2306
2307 if ($value =~ m/[\n\r]/) {
2308 die "property contains a line feed\n";
2309 }
2310
2311 if ($type eq 'boolean') {
19672434
DM
2312 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2313 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2314 die "type check ('boolean') failed - got '$value'\n";
1e3baf05
DM
2315 } elsif ($type eq 'integer') {
2316 return int($1) if $value =~ m/^(\d+)$/;
2317 die "type check ('integer') failed - got '$value'\n";
04432191
AD
2318 } elsif ($type eq 'number') {
2319 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2320 die "type check ('number') failed - got '$value'\n";
1e3baf05
DM
2321 } elsif ($type eq 'string') {
2322 if (my $fmt = $confdesc->{$key}->{format}) {
1e3baf05 2323 PVE::JSONSchema::check_format($fmt, $value);
19672434
DM
2324 return $value;
2325 }
1e3baf05 2326 $value =~ s/^\"(.*)\"$/$1/;
19672434 2327 return $value;
1e3baf05
DM
2328 } else {
2329 die "internal error"
2330 }
2331}
2332
1e3baf05 2333sub destroy_vm {
75854662 2334 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
1e3baf05 2335
ffda963f 2336 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2337
30fdf99c
SH
2338 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2339 PVE::QemuConfig->check_lock($conf);
2340 }
1e3baf05 2341
5e67a2d2
DC
2342 if ($conf->{template}) {
2343 # check if any base image is still used by a linked clone
3ab0f925 2344 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
5e67a2d2 2345 my ($ds, $drive) = @_;
5e67a2d2
DC
2346 return if drive_is_cdrom($drive);
2347
2348 my $volid = $drive->{file};
5e67a2d2
DC
2349 return if !$volid || $volid =~ m|^/|;
2350
2351 die "base volume '$volid' is still in use by linked cloned\n"
2352 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2353
2354 });
2355 }
2356
3e07c6d5 2357 my $volids = {};
ba1a1984 2358 my $remove_owned_drive = sub {
1e3baf05 2359 my ($ds, $drive) = @_;
9c52f5ed 2360 return if drive_is_cdrom($drive, 1);
1e3baf05
DM
2361
2362 my $volid = $drive->{file};
ff1a2432 2363 return if !$volid || $volid =~ m|^/|;
3e07c6d5 2364 return if $volids->{$volid};
1e3baf05 2365
6b64503e 2366 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
ff1a2432 2367 return if !$path || !$owner || ($owner != $vmid);
1e3baf05 2368
3e07c6d5 2369 $volids->{$volid} = 1;
a2f50f01 2370 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
31b52247 2371 warn "Could not remove disk '$volid', check manually: $@" if $@;
ba1a1984
FE
2372 };
2373
2374 # only remove disks owned by this VM (referenced in the config)
2375 my $include_opts = {
2376 include_unused => 1,
2377 extra_keys => ['vmstate'],
2378 };
2379 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2380
2381 for my $snap (values %{$conf->{snapshots}}) {
2382 next if !defined($snap->{vmstate});
2383 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2384 next if !defined($drive);
2385 $remove_owned_drive->('vmstate', $drive);
2386 }
19672434 2387
3e07c6d5
FG
2388 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2389
75854662 2390 if ($purge_unreferenced) { # also remove unreferenced disk
d0ff75d9 2391 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
75854662
TL
2392 PVE::Storage::foreach_volid($vmdisks, sub {
2393 my ($volid, $sid, $volname, $d) = @_;
2394 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2395 warn $@ if $@;
2396 });
2397 }
dfda979e 2398
b04ea584 2399 if (defined $replacement_conf) {
8baf8bc7 2400 PVE::QemuConfig->write_config($vmid, $replacement_conf);
dfda979e
DJ
2401 } else {
2402 PVE::QemuConfig->destroy_config($vmid);
2403 }
1e3baf05
DM
2404}
2405
1e3baf05 2406sub parse_vm_config {
ad5812d8 2407 my ($filename, $raw, $strict) = @_;
1e3baf05 2408
d1c1af4b 2409 return if !defined($raw);
1e3baf05 2410
554ac7e7 2411 my $res = {
fc1ddcdc 2412 digest => Digest::SHA::sha1_hex($raw),
0d18dcfc 2413 snapshots => {},
0d732d16 2414 pending => {},
95a5135d 2415 cloudinit => {},
554ac7e7 2416 };
1e3baf05 2417
ad5812d8
FG
2418 my $handle_error = sub {
2419 my ($msg) = @_;
2420
2421 if ($strict) {
2422 die $msg;
2423 } else {
2424 warn $msg;
2425 }
2426 };
2427
19672434 2428 $filename =~ m|/qemu-server/(\d+)\.conf$|
1e3baf05
DM
2429 || die "got strange filename '$filename'";
2430
2431 my $vmid = $1;
2432
0d18dcfc 2433 my $conf = $res;
b0ec896e 2434 my $descr;
cbfc9d75
TL
2435 my $finish_description = sub {
2436 if (defined($descr)) {
2437 $descr =~ s/\s+$//;
2438 $conf->{description} = $descr;
2439 }
2440 $descr = undef;
2441 };
e297c490 2442 my $section = '';
0581fe4f 2443
0d18dcfc
DM
2444 my @lines = split(/\n/, $raw);
2445 foreach my $line (@lines) {
1e3baf05 2446 next if $line =~ m/^\s*$/;
be190583 2447
eab09f4e 2448 if ($line =~ m/^\[PENDING\]\s*$/i) {
e297c490 2449 $section = 'pending';
cbfc9d75 2450 $finish_description->();
e297c490 2451 $conf = $res->{$section} = {};
eab09f4e 2452 next;
95a5135d
AD
2453 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2454 $section = 'cloudinit';
eb9923f9 2455 $finish_description->();
95a5135d
AD
2456 $conf = $res->{$section} = {};
2457 next;
eab09f4e 2458
0d732d16 2459 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
e297c490 2460 $section = $1;
cbfc9d75 2461 $finish_description->();
e297c490 2462 $conf = $res->{snapshots}->{$section} = {};
0d18dcfc
DM
2463 next;
2464 }
1e3baf05 2465
d1e7b922 2466 if ($line =~ m/^\#(.*)$/) {
b0ec896e 2467 $descr = '' if !defined($descr);
0581fe4f
DM
2468 $descr .= PVE::Tools::decode_text($1) . "\n";
2469 next;
2470 }
2471
1e3baf05 2472 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
b0ec896e 2473 $descr = '' if !defined($descr);
0581fe4f 2474 $descr .= PVE::Tools::decode_text($2);
0d18dcfc
DM
2475 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2476 $conf->{snapstate} = $1;
1e3baf05
DM
2477 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2478 my $key = $1;
2479 my $value = $2;
0d18dcfc 2480 $conf->{$key} = $value;
ef824322 2481 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
e297c490 2482 my $value = $1;
ef824322
DM
2483 if ($section eq 'pending') {
2484 $conf->{delete} = $value; # we parse this later
2485 } else {
ad5812d8 2486 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
eab09f4e 2487 }
15cf7698 2488 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
1e3baf05
DM
2489 my $key = $1;
2490 my $value = $2;
c229961a
TL
2491 if ($section eq 'cloudinit') {
2492 # ignore validation only used for informative purpose
2493 $conf->{$key} = $value;
2494 next;
2495 }
1e3baf05
DM
2496 eval { $value = check_type($key, $value); };
2497 if ($@) {
ad5812d8 2498 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
1e3baf05 2499 } else {
b799312f 2500 $key = 'ide2' if $key eq 'cdrom';
1e3baf05 2501 my $fmt = $confdesc->{$key}->{format};
b799312f 2502 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
1e3baf05
DM
2503 my $v = parse_drive($key, $value);
2504 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2505 $v->{file} = $volid;
71c58bb7 2506 $value = print_drive($v);
1e3baf05 2507 } else {
ad5812d8 2508 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
1e3baf05
DM
2509 next;
2510 }
2511 }
2512
b799312f 2513 $conf->{$key} = $value;
1e3baf05 2514 }
f8d2a1ce 2515 } else {
ad5812d8 2516 $handle_error->("vm $vmid - unable to parse config: $line\n");
1e3baf05
DM
2517 }
2518 }
2519
cbfc9d75 2520 $finish_description->();
0d18dcfc 2521 delete $res->{snapstate}; # just to be sure
1e3baf05
DM
2522
2523 return $res;
2524}
2525
1858638f
DM
2526sub write_vm_config {
2527 my ($filename, $conf) = @_;
1e3baf05 2528
0d18dcfc
DM
2529 delete $conf->{snapstate}; # just to be sure
2530
1858638f
DM
2531 if ($conf->{cdrom}) {
2532 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2533 $conf->{ide2} = $conf->{cdrom};
2534 delete $conf->{cdrom};
2535 }
1e3baf05
DM
2536
2537 # we do not use 'smp' any longer
1858638f
DM
2538 if ($conf->{sockets}) {
2539 delete $conf->{smp};
2540 } elsif ($conf->{smp}) {
2541 $conf->{sockets} = $conf->{smp};
2542 delete $conf->{cores};
2543 delete $conf->{smp};
1e3baf05
DM
2544 }
2545
ee2f90b1 2546 my $used_volids = {};
0d18dcfc 2547
ee2f90b1 2548 my $cleanup_config = sub {
ef824322 2549 my ($cref, $pending, $snapname) = @_;
1858638f 2550
ee2f90b1
DM
2551 foreach my $key (keys %$cref) {
2552 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
95a5135d 2553 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
ee2f90b1 2554 my $value = $cref->{$key};
ef824322
DM
2555 if ($key eq 'delete') {
2556 die "propertry 'delete' is only allowed in [PENDING]\n"
2557 if !$pending;
2558 # fixme: check syntax?
2559 next;
2560 }
ee2f90b1
DM
2561 eval { $value = check_type($key, $value); };
2562 die "unable to parse value of '$key' - $@" if $@;
1858638f 2563
ee2f90b1
DM
2564 $cref->{$key} = $value;
2565
74479ee9 2566 if (!$snapname && is_valid_drivename($key)) {
ed221350 2567 my $drive = parse_drive($key, $value);
ee2f90b1
DM
2568 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2569 }
1e3baf05 2570 }
ee2f90b1
DM
2571 };
2572
2573 &$cleanup_config($conf);
ef824322
DM
2574
2575 &$cleanup_config($conf->{pending}, 1);
2576
ee2f90b1 2577 foreach my $snapname (keys %{$conf->{snapshots}}) {
15c6e277 2578 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
ef824322 2579 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
1e3baf05
DM
2580 }
2581
1858638f
DM
2582 # remove 'unusedX' settings if we re-add a volume
2583 foreach my $key (keys %$conf) {
2584 my $value = $conf->{$key};
ee2f90b1 2585 if ($key =~ m/^unused/ && $used_volids->{$value}) {
1858638f 2586 delete $conf->{$key};
1e3baf05 2587 }
1858638f 2588 }
be190583 2589
0d18dcfc 2590 my $generate_raw_config = sub {
b0ec896e 2591 my ($conf, $pending) = @_;
0581fe4f 2592
0d18dcfc
DM
2593 my $raw = '';
2594
2595 # add description as comment to top of file
b0ec896e
DM
2596 if (defined(my $descr = $conf->{description})) {
2597 if ($descr) {
2598 foreach my $cl (split(/\n/, $descr)) {
2599 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2600 }
2601 } else {
2602 $raw .= "#\n" if $pending;
2603 }
0d18dcfc
DM
2604 }
2605
2606 foreach my $key (sort keys %$conf) {
95a5135d 2607 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
0d18dcfc
DM
2608 $raw .= "$key: $conf->{$key}\n";
2609 }
2610 return $raw;
2611 };
0581fe4f 2612
0d18dcfc 2613 my $raw = &$generate_raw_config($conf);
ef824322
DM
2614
2615 if (scalar(keys %{$conf->{pending}})){
2616 $raw .= "\n[PENDING]\n";
b0ec896e 2617 $raw .= &$generate_raw_config($conf->{pending}, 1);
ef824322
DM
2618 }
2619
1e1d6f58 2620 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
95a5135d
AD
2621 $raw .= "\n[special:cloudinit]\n";
2622 $raw .= &$generate_raw_config($conf->{cloudinit});
2623 }
2624
0d18dcfc
DM
2625 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2626 $raw .= "\n[$snapname]\n";
2627 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
1858638f 2628 }
1e3baf05 2629
1858638f
DM
2630 return $raw;
2631}
1e3baf05 2632
19672434 2633sub load_defaults {
1e3baf05
DM
2634
2635 my $res = {};
2636
2637 # we use static defaults from our JSON schema configuration
2638 foreach my $key (keys %$confdesc) {
2639 if (defined(my $default = $confdesc->{$key}->{default})) {
2640 $res->{$key} = $default;
2641 }
2642 }
19672434 2643
1e3baf05
DM
2644 return $res;
2645}
2646
2647sub config_list {
2648 my $vmlist = PVE::Cluster::get_vmlist();
2649 my $res = {};
2650 return $res if !$vmlist || !$vmlist->{ids};
2651 my $ids = $vmlist->{ids};
38277afc 2652 my $nodename = nodename();
1e3baf05 2653
1e3baf05
DM
2654 foreach my $vmid (keys %$ids) {
2655 my $d = $ids->{$vmid};
2656 next if !$d->{node} || $d->{node} ne $nodename;
5ee957cc 2657 next if !$d->{type} || $d->{type} ne 'qemu';
1e3baf05
DM
2658 $res->{$vmid}->{exists} = 1;
2659 }
2660 return $res;
2661}
2662
64e13401
DM
2663# test if VM uses local resources (to prevent migration)
2664sub check_local_resources {
2665 my ($conf, $noerr) = @_;
2666
ca6abacf 2667 my @loc_res = ();
a52eb3c4
DC
2668 my $mapped_res = [];
2669
2670 my $nodelist = PVE::Cluster::get_nodelist();
2671 my $pci_map = PVE::Mapping::PCI::config();
2672 my $usb_map = PVE::Mapping::USB::config();
2673
2674 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2675
2676 my $add_missing_mapping = sub {
2677 my ($type, $key, $id) = @_;
2678 for my $node (@$nodelist) {
2679 my $entry;
2680 if ($type eq 'pci') {
2681 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2682 } elsif ($type eq 'usb') {
2683 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2684 }
2685 if (!scalar($entry->@*)) {
2686 push @{$missing_mappings_by_node->{$node}}, $key;
2687 }
2688 }
2689 };
19672434 2690
ca6abacf
TM
2691 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2692 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
64e13401 2693
ca6abacf 2694 push @loc_res, "ivshmem" if $conf->{ivshmem};
6dbcb073 2695
0d29ab3b 2696 foreach my $k (keys %$conf) {
a52eb3c4
DC
2697 if ($k =~ m/^usb/) {
2698 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2699 next if $entry->{host} =~ m/^spice$/i;
2700 if ($entry->{mapping}) {
2701 $add_missing_mapping->('usb', $k, $entry->{mapping});
2702 push @$mapped_res, $k;
2703 }
2704 }
2705 if ($k =~ m/^hostpci/) {
2706 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2707 if ($entry->{mapping}) {
2708 $add_missing_mapping->('pci', $k, $entry->{mapping});
2709 push @$mapped_res, $k;
2710 }
2711 }
d44712fc
EK
2712 # sockets are safe: they will recreated be on the target side post-migrate
2713 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
ca6abacf 2714 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
64e13401
DM
2715 }
2716
ca6abacf 2717 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
64e13401 2718
a52eb3c4 2719 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
64e13401
DM
2720}
2721
719893a9 2722# check if used storages are available on all nodes (use by migrate)
47152e2e
DM
2723sub check_storage_availability {
2724 my ($storecfg, $conf, $node) = @_;
2725
912792e2 2726 PVE::QemuConfig->foreach_volume($conf, sub {
47152e2e
DM
2727 my ($ds, $drive) = @_;
2728
2729 my $volid = $drive->{file};
2730 return if !$volid;
2731
2732 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2733 return if !$sid;
2734
2735 # check if storage is available on both nodes
0d2db084
FE
2736 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2737 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
24b84b47 2738
3148f0b0
TL
2739 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2740
2741 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2742 if !$scfg->{content}->{$vtype};
47152e2e
DM
2743 });
2744}
2745
719893a9
DM
2746# list nodes where all VM images are available (used by has_feature API)
2747sub shared_nodes {
2748 my ($conf, $storecfg) = @_;
2749
2750 my $nodelist = PVE::Cluster::get_nodelist();
2751 my $nodehash = { map { $_ => 1 } @$nodelist };
38277afc 2752 my $nodename = nodename();
be190583 2753
912792e2 2754 PVE::QemuConfig->foreach_volume($conf, sub {
719893a9
DM
2755 my ($ds, $drive) = @_;
2756
2757 my $volid = $drive->{file};
2758 return if !$volid;
2759
2760 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2761 if ($storeid) {
2762 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2763 if ($scfg->{disable}) {
2764 $nodehash = {};
2765 } elsif (my $avail = $scfg->{nodes}) {
2766 foreach my $node (keys %$nodehash) {
2767 delete $nodehash->{$node} if !$avail->{$node};
2768 }
2769 } elsif (!$scfg->{shared}) {
2770 foreach my $node (keys %$nodehash) {
2771 delete $nodehash->{$node} if $node ne $nodename
2772 }
2773 }
2774 }
2775 });
2776
2777 return $nodehash
2778}
2779
f25852c2
TM
2780sub check_local_storage_availability {
2781 my ($conf, $storecfg) = @_;
2782
2783 my $nodelist = PVE::Cluster::get_nodelist();
2784 my $nodehash = { map { $_ => {} } @$nodelist };
2785
912792e2 2786 PVE::QemuConfig->foreach_volume($conf, sub {
f25852c2
TM
2787 my ($ds, $drive) = @_;
2788
2789 my $volid = $drive->{file};
2790 return if !$volid;
2791
2792 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2793 if ($storeid) {
2794 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2795
2796 if ($scfg->{disable}) {
2797 foreach my $node (keys %$nodehash) {
32075a2c 2798 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2799 }
2800 } elsif (my $avail = $scfg->{nodes}) {
2801 foreach my $node (keys %$nodehash) {
2802 if (!$avail->{$node}) {
32075a2c 2803 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2804 }
2805 }
2806 }
2807 }
2808 });
2809
32075a2c
TL
2810 foreach my $node (values %$nodehash) {
2811 if (my $unavail = $node->{unavailable_storages}) {
2812 $node->{unavailable_storages} = [ sort keys %$unavail ];
2813 }
2814 }
2815
f25852c2
TM
2816 return $nodehash
2817}
2818
babf613a 2819# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
1e3baf05 2820sub check_running {
7e8dcf2c 2821 my ($vmid, $nocheck, $node) = @_;
1e3baf05 2822
a20dc58a
FG
2823 # $nocheck is set when called during a migration, in which case the config
2824 # file might still or already reside on the *other* node
2825 # - because rename has already happened, and current node is source
2826 # - because rename hasn't happened yet, and current node is target
2827 # - because rename has happened, current node is target, but hasn't yet
2828 # processed it yet
babf613a
SR
2829 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2830 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
1e3baf05
DM
2831}
2832
2833sub vzlist {
19672434 2834
1e3baf05
DM
2835 my $vzlist = config_list();
2836
d036e418 2837 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
1e3baf05 2838
19672434 2839 while (defined(my $de = $fd->read)) {
1e3baf05
DM
2840 next if $de !~ m/^(\d+)\.pid$/;
2841 my $vmid = $1;
6b64503e
DM
2842 next if !defined($vzlist->{$vmid});
2843 if (my $pid = check_running($vmid)) {
1e3baf05
DM
2844 $vzlist->{$vmid}->{pid} = $pid;
2845 }
2846 }
2847
2848 return $vzlist;
2849}
2850
b1a70cab
DM
2851our $vmstatus_return_properties = {
2852 vmid => get_standard_option('pve-vmid'),
2853 status => {
7bd9abd2 2854 description => "QEMU process status.",
b1a70cab
DM
2855 type => 'string',
2856 enum => ['stopped', 'running'],
2857 },
2858 maxmem => {
2859 description => "Maximum memory in bytes.",
2860 type => 'integer',
2861 optional => 1,
2862 renderer => 'bytes',
2863 },
2864 maxdisk => {
2865 description => "Root disk size in bytes.",
2866 type => 'integer',
2867 optional => 1,
2868 renderer => 'bytes',
2869 },
2870 name => {
2871 description => "VM name.",
2872 type => 'string',
2873 optional => 1,
2874 },
2875 qmpstatus => {
58542139 2876 description => "VM run state from the 'query-status' QMP monitor command.",
b1a70cab
DM
2877 type => 'string',
2878 optional => 1,
2879 },
2880 pid => {
2881 description => "PID of running qemu process.",
2882 type => 'integer',
2883 optional => 1,
2884 },
2885 uptime => {
2886 description => "Uptime.",
2887 type => 'integer',
2888 optional => 1,
2889 renderer => 'duration',
2890 },
2891 cpus => {
2892 description => "Maximum usable CPUs.",
2893 type => 'number',
2894 optional => 1,
2895 },
e6ed61b4 2896 lock => {
11efdfa5 2897 description => "The current config lock, if any.",
e6ed61b4
DC
2898 type => 'string',
2899 optional => 1,
b8e7068a
DC
2900 },
2901 tags => {
2902 description => "The current configured tags, if any",
2903 type => 'string',
2904 optional => 1,
2905 },
949112c3
FE
2906 'running-machine' => {
2907 description => "The currently running machine type (if running).",
2908 type => 'string',
2909 optional => 1,
2910 },
2911 'running-qemu' => {
2912 description => "The currently running QEMU version (if running).",
2913 type => 'string',
2914 optional => 1,
2915 },
b1a70cab
DM
2916};
2917
1e3baf05
DM
2918my $last_proc_pid_stat;
2919
03a33f30
DM
2920# get VM status information
2921# This must be fast and should not block ($full == false)
2922# We only query KVM using QMP if $full == true (this can be slow)
1e3baf05 2923sub vmstatus {
03a33f30 2924 my ($opt_vmid, $full) = @_;
1e3baf05
DM
2925
2926 my $res = {};
2927
19672434 2928 my $storecfg = PVE::Storage::config();
1e3baf05
DM
2929
2930 my $list = vzlist();
3618ee99
EK
2931 my $defaults = load_defaults();
2932
694fcad4 2933 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
1e3baf05 2934
ae4915a2
DM
2935 my $cpucount = $cpuinfo->{cpus} || 1;
2936
1e3baf05
DM
2937 foreach my $vmid (keys %$list) {
2938 next if $opt_vmid && ($vmid ne $opt_vmid);
2939
9f78b695 2940 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2941
ad2cad72 2942 my $d = { vmid => int($vmid) };
8a0addab 2943 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
1e3baf05
DM
2944
2945 # fixme: better status?
2946 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2947
776c5f50 2948 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
af990afe
DM
2949 if (defined($size)) {
2950 $d->{disk} = 0; # no info available
1e3baf05
DM
2951 $d->{maxdisk} = $size;
2952 } else {
2953 $d->{disk} = 0;
2954 $d->{maxdisk} = 0;
2955 }
2956
3618ee99
EK
2957 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2958 * ($conf->{cores} || $defaults->{cores});
ae4915a2 2959 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
d7c8364b 2960 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
ae4915a2 2961
1e3baf05 2962 $d->{name} = $conf->{name} || "VM $vmid";
3618ee99
EK
2963 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2964 : $defaults->{memory}*(1024*1024);
1e3baf05 2965
8b1accf7 2966 if ($conf->{balloon}) {
4bdb0514 2967 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
3618ee99
EK
2968 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2969 : $defaults->{shares};
8b1accf7
DM
2970 }
2971
1e3baf05
DM
2972 $d->{uptime} = 0;
2973 $d->{cpu} = 0;
1e3baf05
DM
2974 $d->{mem} = 0;
2975
2976 $d->{netout} = 0;
2977 $d->{netin} = 0;
2978
2979 $d->{diskread} = 0;
2980 $d->{diskwrite} = 0;
2981
75a2a423 2982 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
4d8c851b 2983
8107b378 2984 $d->{serial} = 1 if conf_has_serial($conf);
e6ed61b4 2985 $d->{lock} = $conf->{lock} if $conf->{lock};
b8e7068a 2986 $d->{tags} = $conf->{tags} if defined($conf->{tags});
8107b378 2987
1e3baf05
DM
2988 $res->{$vmid} = $d;
2989 }
2990
2991 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2992 foreach my $dev (keys %$netdev) {
2993 next if $dev !~ m/^tap([1-9]\d*)i/;
2994 my $vmid = $1;
2995 my $d = $res->{$vmid};
2996 next if !$d;
19672434 2997
1e3baf05
DM
2998 $d->{netout} += $netdev->{$dev}->{receive};
2999 $d->{netin} += $netdev->{$dev}->{transmit};
604ea644
AD
3000
3001 if ($full) {
ad2cad72
FE
3002 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
3003 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
604ea644
AD
3004 }
3005
1e3baf05
DM
3006 }
3007
1e3baf05
DM
3008 my $ctime = gettimeofday;
3009
3010 foreach my $vmid (keys %$list) {
3011
3012 my $d = $res->{$vmid};
3013 my $pid = $d->{pid};
3014 next if !$pid;
3015
694fcad4
DM
3016 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
3017 next if !$pstat; # not running
19672434 3018
694fcad4 3019 my $used = $pstat->{utime} + $pstat->{stime};
1e3baf05 3020
694fcad4 3021 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
1e3baf05 3022
694fcad4 3023 if ($pstat->{vsize}) {
6b64503e 3024 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
1e3baf05
DM
3025 }
3026
3027 my $old = $last_proc_pid_stat->{$pid};
3028 if (!$old) {
19672434
DM
3029 $last_proc_pid_stat->{$pid} = {
3030 time => $ctime,
1e3baf05
DM
3031 used => $used,
3032 cpu => 0,
1e3baf05
DM
3033 };
3034 next;
3035 }
3036
7f0b5beb 3037 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
1e3baf05
DM
3038
3039 if ($dtime > 1000) {
3040 my $dutime = $used - $old->{used};
3041
ae4915a2 3042 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
1e3baf05 3043 $last_proc_pid_stat->{$pid} = {
19672434 3044 time => $ctime,
1e3baf05
DM
3045 used => $used,
3046 cpu => $d->{cpu},
1e3baf05
DM
3047 };
3048 } else {
3049 $d->{cpu} = $old->{cpu};
1e3baf05
DM
3050 }
3051 }
3052
f5eb281a 3053 return $res if !$full;
03a33f30
DM
3054
3055 my $qmpclient = PVE::QMPClient->new();
3056
64e7fcf2
DM
3057 my $ballooncb = sub {
3058 my ($vmid, $resp) = @_;
3059
3060 my $info = $resp->{'return'};
38babf81
DM
3061 return if !$info->{max_mem};
3062
64e7fcf2
DM
3063 my $d = $res->{$vmid};
3064
38babf81
DM
3065 # use memory assigned to VM
3066 $d->{maxmem} = $info->{max_mem};
3067 $d->{balloon} = $info->{actual};
3068
3069 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3070 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3071 $d->{freemem} = $info->{free_mem};
64e7fcf2
DM
3072 }
3073
604ea644 3074 $d->{ballooninfo} = $info;
64e7fcf2
DM
3075 };
3076
03a33f30
DM
3077 my $blockstatscb = sub {
3078 my ($vmid, $resp) = @_;
3079 my $data = $resp->{'return'} || [];
3080 my $totalrdbytes = 0;
3081 my $totalwrbytes = 0;
604ea644 3082
03a33f30
DM
3083 for my $blockstat (@$data) {
3084 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3085 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
604ea644
AD
3086
3087 $blockstat->{device} =~ s/drive-//;
3088 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
03a33f30
DM
3089 }
3090 $res->{$vmid}->{diskread} = $totalrdbytes;
3091 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3092 };
3093
949112c3
FE
3094 my $machinecb = sub {
3095 my ($vmid, $resp) = @_;
3096 my $data = $resp->{'return'} || [];
3097
3098 $res->{$vmid}->{'running-machine'} =
3099 PVE::QemuServer::Machine::current_from_query_machines($data);
3100 };
3101
3102 my $versioncb = sub {
3103 my ($vmid, $resp) = @_;
3104 my $data = $resp->{'return'} // {};
3105 my $version = 'unknown';
3106
3107 if (my $v = $data->{qemu}) {
3108 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3109 }
3110
3111 $res->{$vmid}->{'running-qemu'} = $version;
3112 };
3113
03a33f30
DM
3114 my $statuscb = sub {
3115 my ($vmid, $resp) = @_;
64e7fcf2 3116
03a33f30 3117 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
949112c3
FE
3118 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3119 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
64e7fcf2
DM
3120 # this fails if ballon driver is not loaded, so this must be
3121 # the last commnand (following command are aborted if this fails).
38babf81 3122 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
03a33f30
DM
3123
3124 my $status = 'unknown';
3125 if (!defined($status = $resp->{'return'}->{status})) {
3126 warn "unable to get VM status\n";
3127 return;
3128 }
3129
3130 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3131 };
3132
3133 foreach my $vmid (keys %$list) {
3134 next if $opt_vmid && ($vmid ne $opt_vmid);
3135 next if !$res->{$vmid}->{pid}; # not running
3136 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3137 }
3138
b017fbda 3139 $qmpclient->queue_execute(undef, 2);
03a33f30 3140
6891fd70
SR
3141 foreach my $vmid (keys %$list) {
3142 next if $opt_vmid && ($vmid ne $opt_vmid);
e5b18771
FG
3143 next if !$res->{$vmid}->{pid}; #not running
3144
6891fd70
SR
3145 # we can't use the $qmpclient since it might have already aborted on
3146 # 'query-balloon', but this might also fail for older versions...
3147 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3148 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3149 }
3150
03a33f30
DM
3151 foreach my $vmid (keys %$list) {
3152 next if $opt_vmid && ($vmid ne $opt_vmid);
3153 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3154 }
3155
1e3baf05
DM
3156 return $res;
3157}
3158
8107b378
DC
3159sub conf_has_serial {
3160 my ($conf) = @_;
3161
3162 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3163 if ($conf->{"serial$i"}) {
3164 return 1;
3165 }
3166 }
3167
3168 return 0;
3169}
3170
d5535a00
TL
3171sub conf_has_audio {
3172 my ($conf, $id) = @_;
3173
3174 $id //= 0;
3175 my $audio = $conf->{"audio$id"};
d1c1af4b 3176 return if !defined($audio);
d5535a00 3177
4df98f2f 3178 my $audioproperties = parse_property_string($audio_fmt, $audio);
d5535a00
TL
3179 my $audiodriver = $audioproperties->{driver} // 'spice';
3180
3181 return {
3182 dev => $audioproperties->{device},
b0f96836 3183 dev_id => "audiodev$id",
d5535a00
TL
3184 backend => $audiodriver,
3185 backend_id => "$audiodriver-backend${id}",
3186 };
3187}
3188
b01de199 3189sub audio_devs {
1cc5ed1b 3190 my ($audio, $audiopciaddr, $machine_version) = @_;
b01de199
TL
3191
3192 my $devs = [];
3193
3194 my $id = $audio->{dev_id};
1cc5ed1b
AL
3195 my $audiodev = "";
3196 if (min_version($machine_version, 4, 2)) {
3197 $audiodev = ",audiodev=$audio->{backend_id}";
3198 }
b01de199
TL
3199
3200 if ($audio->{dev} eq 'AC97') {
1cc5ed1b 3201 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
b01de199
TL
3202 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3203 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
1cc5ed1b
AL
3204 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3205 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
b01de199
TL
3206 } else {
3207 die "unkown audio device '$audio->{dev}', implement me!";
3208 }
3209
3210 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3211
3212 return $devs;
3213}
3214
f9dde219
SR
3215sub get_tpm_paths {
3216 my ($vmid) = @_;
3217 return {
3218 socket => "/var/run/qemu-server/$vmid.swtpm",
3219 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3220 };
3221}
3222
3223sub add_tpm_device {
3224 my ($vmid, $devices, $conf) = @_;
3225
3226 return if !$conf->{tpmstate0};
3227
3228 my $paths = get_tpm_paths($vmid);
3229
3230 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3231 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3232 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3233}
3234
3235sub start_swtpm {
3236 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3237
3238 return if !$tpmdrive;
3239
3240 my $state;
3241 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3242 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3243 if ($storeid) {
3244 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3245 } else {
3246 $state = $tpm->{file};
3247 }
3248
3249 my $paths = get_tpm_paths($vmid);
3250
3251 # during migration, we will get state from remote
3252 #
3253 if (!$migration) {
3254 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3255 my $setup_cmd = [
3256 "swtpm_setup",
3257 "--tpmstate",
3258 "file://$state",
3259 "--createek",
3260 "--create-ek-cert",
3261 "--create-platform-cert",
3262 "--lock-nvram",
3263 "--config",
3264 "/etc/swtpm_setup.conf", # do not use XDG configs
3265 "--runas",
3266 "0", # force creation as root, error if not possible
3267 "--not-overwrite", # ignore existing state, do not modify
3268 ];
3269
3270 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3271 # TPM 2.0 supports ECC crypto, use if possible
3272 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3273
3274 run_command($setup_cmd, outfunc => sub {
3275 print "swtpm_setup: $1\n";
3276 });
3277 }
3278
72a5a176
FE
3279 # Used to distinguish different invocations in the log.
3280 my $log_prefix = "[id=" . int(time()) . "] ";
3281
f9dde219
SR
3282 my $emulator_cmd = [
3283 "swtpm",
3284 "socket",
3285 "--tpmstate",
3286 "backend-uri=file://$state,mode=0600",
3287 "--ctrl",
3288 "type=unixio,path=$paths->{socket},mode=0600",
3289 "--pid",
3290 "file=$paths->{pid}",
3291 "--terminate", # terminate on QEMU disconnect
3292 "--daemon",
b2e9c4d3 3293 "--log",
72a5a176 3294 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
f9dde219
SR
3295 ];
3296 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3297 run_command($emulator_cmd, outfunc => sub { print $1; });
3298
6bbcd71f 3299 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
f85951dc 3300 while (! -e $paths->{pid}) {
90c41bac 3301 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
6bbcd71f 3302 usleep(50_000);
f85951dc
SR
3303 }
3304
f9dde219
SR
3305 # return untainted PID of swtpm daemon so it can be killed on error
3306 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3307 return $1;
3308}
3309
86b8228b
DM
3310sub vga_conf_has_spice {
3311 my ($vga) = @_;
3312
55655ebc
DC
3313 my $vgaconf = parse_vga($vga);
3314 my $vgatype = $vgaconf->{type};
3315 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
590e698c
DM
3316
3317 return $1 || 1;
86b8228b
DM
3318}
3319
d731ecbe
WB
3320sub is_native($) {
3321 my ($arch) = @_;
3322 return get_host_arch() eq $arch;
3323}
3324
045749f2
TL
3325sub get_vm_arch {
3326 my ($conf) = @_;
3327 return $conf->{arch} // get_host_arch();
3328}
3329
d731ecbe
WB
3330my $default_machines = {
3331 x86_64 => 'pc',
3332 aarch64 => 'virt',
3333};
3334
0761e619
TL
3335sub get_installed_machine_version {
3336 my ($kvmversion) = @_;
3337 $kvmversion = kvm_user_version() if !defined($kvmversion);
3338 $kvmversion =~ m/^(\d+\.\d+)/;
3339 return $1;
3340}
3341
3342sub windows_get_pinned_machine_version {
3343 my ($machine, $base_version, $kvmversion) = @_;
3344
3345 my $pin_version = $base_version;
3346 if (!defined($base_version) ||
3347 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3348 ) {
3349 $pin_version = get_installed_machine_version($kvmversion);
3350 }
3351 if (!$machine || $machine eq 'pc') {
3352 $machine = "pc-i440fx-$pin_version";
3353 } elsif ($machine eq 'q35') {
3354 $machine = "pc-q35-$pin_version";
3355 } elsif ($machine eq 'virt') {
3356 $machine = "virt-$pin_version";
3357 } else {
3358 warn "unknown machine type '$machine', not touching that!\n";
3359 }
3360
3361 return $machine;
3362}
3363
045749f2 3364sub get_vm_machine {
ac0077cc 3365 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
045749f2
TL
3366
3367 my $machine = $forcemachine || $conf->{machine};
d731ecbe 3368
9471e48b 3369 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
4dd1e83c
TL
3370 $kvmversion //= kvm_user_version();
3371 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3372 # layout which confuses windows quite a bit and may result in various regressions..
3373 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3374 if (windows_version($conf->{ostype})) {
0761e619 3375 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
4dd1e83c 3376 }
045749f2
TL
3377 $arch //= 'x86_64';
3378 $machine ||= $default_machines->{$arch};
ac0077cc 3379 if ($add_pve_version) {
ac0077cc
SR
3380 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3381 $machine .= "+pve$pvever";
3382 }
3383 }
3384
d4be7f31
SR
3385 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3386 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3387 $machine = $1 if $is_pxe;
3388
ac0077cc
SR
3389 # for version-pinned machines that do not include a pve-version (e.g.
3390 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3391 $machine .= '+pve0';
d4be7f31
SR
3392
3393 $machine .= '.pxe' if $is_pxe;
045749f2
TL
3394 }
3395
3396 return $machine;
d731ecbe
WB
3397}
3398
90b20b15
DC
3399sub get_ovmf_files($$$) {
3400 my ($arch, $efidisk, $smm) = @_;
96ed3574 3401
b5099b4f 3402 my $types = $OVMF->{$arch}
96ed3574
WB
3403 or die "no OVMF images known for architecture '$arch'\n";
3404
b5099b4f 3405 my $type = 'default';
1183c8f1 3406 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
90b20b15
DC
3407 $type = $smm ? "4m" : "4m-no-smm";
3408 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
b5099b4f
SR
3409 }
3410
f78c9b6b
NU
3411 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3412 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3413 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3414
3415 return ($ovmf_code, $ovmf_vars);
96ed3574
WB
3416}
3417
6908fd9b
WB
3418my $Arch2Qemu = {
3419 aarch64 => '/usr/bin/qemu-system-aarch64',
3420 x86_64 => '/usr/bin/qemu-system-x86_64',
3421};
3422sub get_command_for_arch($) {
3423 my ($arch) = @_;
3424 return '/usr/bin/kvm' if is_native($arch);
3425
3426 my $cmd = $Arch2Qemu->{$arch}
3427 or die "don't know how to emulate architecture '$arch'\n";
3428 return $cmd;
3429}
3430
05a4c550
SR
3431# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3432# to use in a QEMU command line (-cpu element), first array_intersect the result
3433# of query_supported_ with query_understood_. This is necessary because:
3434#
3435# a) query_understood_ returns flags the host cannot use and
3436# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3437# flags, but CPU settings - with most of them being flags. Those settings
3438# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3439#
3440# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3441# expensive. If you need the value returned from this, you can get it much
3442# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3443# $accel being 'kvm' or 'tcg'.
3444#
3445# pvestatd calls this function on startup and whenever the QEMU/KVM version
3446# changes, automatically populating pmxcfs.
3447#
3448# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3449# since kvm and tcg machines support different flags
3450#
3451sub query_supported_cpu_flags {
52cffab6 3452 my ($arch) = @_;
05a4c550 3453
52cffab6
SR
3454 $arch //= get_host_arch();
3455 my $default_machine = $default_machines->{$arch};
3456
3457 my $flags = {};
05a4c550
SR
3458
3459 # FIXME: Once this is merged, the code below should work for ARM as well:
3460 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3461 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3462 $arch eq "aarch64";
3463
3464 my $kvm_supported = defined(kvm_version());
3465 my $qemu_cmd = get_command_for_arch($arch);
3466 my $fakevmid = -1;
3467 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3468
3469 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3470 my $query_supported_run_qemu = sub {
3471 my ($kvm) = @_;
3472
3473 my $flags = {};
3474 my $cmd = [
3475 $qemu_cmd,
3476 '-machine', $default_machine,
3477 '-display', 'none',
378ad769 3478 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
05a4c550
SR
3479 '-mon', 'chardev=qmp,mode=control',
3480 '-pidfile', $pidfile,
3481 '-S', '-daemonize'
3482 ];
3483
3484 if (!$kvm) {
3485 push @$cmd, '-accel', 'tcg';
3486 }
3487
3488 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3489 die "QEMU flag querying VM exited with code " . $rc if $rc;
3490
3491 eval {
3492 my $cmd_result = mon_cmd(
3493 $fakevmid,
3494 'query-cpu-model-expansion',
3495 type => 'full',
3496 model => { name => 'host' }
3497 );
3498
3499 my $props = $cmd_result->{model}->{props};
3500 foreach my $prop (keys %$props) {
3501 next if $props->{$prop} ne '1';
3502 # QEMU returns some flags multiple times, with '_', '.' or '-'
3503 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3504 # We only keep those with underscores, to match /proc/cpuinfo
3505 $prop =~ s/\.|-/_/g;
3506 $flags->{$prop} = 1;
3507 }
3508 };
3509 my $err = $@;
3510
6bbcd71f 3511 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
05a4c550
SR
3512 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3513
3514 die $err if $err;
3515
3516 return [ sort keys %$flags ];
3517 };
3518
3519 # We need to query QEMU twice, since KVM and TCG have different supported flags
3520 PVE::QemuConfig->lock_config($fakevmid, sub {
3521 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3522 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3523
3524 if ($kvm_supported) {
3525 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3526 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3527 }
3528 });
3529
3530 return $flags;
3531}
3532
3533# Understood CPU flags are written to a file at 'pve-qemu' compile time
3534my $understood_cpu_flag_dir = "/usr/share/kvm";
3535sub query_understood_cpu_flags {
3536 my $arch = get_host_arch();
3537 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3538
3539 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3540 if ! -e $filepath;
3541
3542 my $raw = file_get_contents($filepath);
3543 $raw =~ s/^\s+|\s+$//g;
3544 my @flags = split(/\s+/, $raw);
3545
3546 return \@flags;
3547}
3548
e5a6919c
FE
3549# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3550# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3551my sub should_disable_smm {
e4263214
FE
3552 my ($conf, $vga, $machine) = @_;
3553
3554 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
e5a6919c
FE
3555
3556 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
bec87424 3557 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
e5a6919c
FE
3558}
3559
b7d80c79
FE
3560my sub print_ovmf_drive_commandlines {
3561 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3562
3d07669c 3563 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
b7d80c79
FE
3564
3565 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
b7d80c79 3566
3d07669c 3567 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
b7d80c79
FE
3568 if ($d) {
3569 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3d07669c 3570 my ($path, $format) = $d->@{'file', 'format'};
b7d80c79
FE
3571 if ($storeid) {
3572 $path = PVE::Storage::path($storecfg, $d->{file});
3573 if (!defined($format)) {
3574 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3575 $format = qemu_img_format($scfg, $volname);
3576 }
3d07669c
TL
3577 } elsif (!defined($format)) {
3578 die "efidisk format must be specified\n";
3579 }
3580 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3581 if ($path =~ m/^rbd:/) {
3582 $var_drive_str .= ',cache=writeback';
3583 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
b7d80c79 3584 }
3d07669c 3585 $var_drive_str .= ",format=$format,file=$path";
b7d80c79 3586
3d07669c
TL
3587 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3588 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
b7d80c79
FE
3589 } else {
3590 log_warn("no efidisk configured! Using temporary efivars disk.");
3d07669c 3591 my $path = "/tmp/$vmid-ovmf.fd";
b7d80c79 3592 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3d07669c
TL
3593 $var_drive_str .= ",format=raw,file=$path";
3594 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
b7d80c79
FE
3595 }
3596
3d07669c 3597 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
b7d80c79
FE
3598}
3599
1e3baf05 3600sub config_to_command {
5921764c
SR
3601 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3602 $pbs_backing) = @_;
1e3baf05 3603
3326ae19 3604 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
5bdcf937 3605 my $devices = [];
5bdcf937 3606 my $bridges = {};
b42d3cf9 3607 my $ostype = $conf->{ostype};
4317f69f 3608 my $winversion = windows_version($ostype);
d731ecbe 3609 my $kvm = $conf->{kvm};
38277afc 3610 my $nodename = nodename();
d731ecbe 3611
045749f2 3612 my $arch = get_vm_arch($conf);
1476b99f
DC
3613 my $kvm_binary = get_command_for_arch($arch);
3614 my $kvmver = kvm_user_version($kvm_binary);
045749f2 3615
a04dd5c4
SR
3616 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3617 $kvmver //= "undefined";
3618 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3619 }
3620
9471e48b
TL
3621 my $add_pve_version = min_version($kvmver, 4, 1);
3622
3623 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
4df98f2f 3624 my $machine_version = extract_version($machine_type, $kvmver);
d731ecbe 3625 $kvm //= 1 if is_native($arch);
4317f69f 3626
a77a53ae 3627 $machine_version =~ m/(\d+)\.(\d+)/;
ac0077cc 3628 my ($machine_major, $machine_minor) = ($1, $2);
ac0077cc 3629
b516c848
SR
3630 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3631 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3632 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
4df98f2f
TL
3633 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3634 ." please upgrade node '$nodename'\n"
b516c848 3635 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
ac0077cc 3636 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
4df98f2f
TL
3637 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3638 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3639 ." node '$nodename'\n";
ac0077cc
SR
3640 }
3641
3642 # if a specific +pve version is required for a feature, use $version_guard
3643 # instead of min_version to allow machines to be run with the minimum
3644 # required version
3645 my $required_pve_version = 0;
3646 my $version_guard = sub {
3647 my ($major, $minor, $pve) = @_;
3648 return 0 if !min_version($machine_version, $major, $minor, $pve);
47f35977
SR
3649 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3650 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
ac0077cc
SR
3651 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3652 return 1;
3653 };
a77a53ae 3654
4df98f2f
TL
3655 if ($kvm && !defined kvm_version()) {
3656 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3657 ." or enable in BIOS.\n";
d731ecbe 3658 }
bfcd9b7e 3659
3392d6ca 3660 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4d3f29ed 3661 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
249c4a6c
AD
3662 my $use_old_bios_files = undef;
3663 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
db656e5f 3664
74fe3d9a 3665 my $cmd = [];
83870398 3666 if ($conf->{affinity}) {
74fe3d9a 3667 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
83870398
DB
3668 }
3669
1476b99f 3670 push @$cmd, $kvm_binary;
1e3baf05
DM
3671
3672 push @$cmd, '-id', $vmid;
3673
e4d4cda1
HR
3674 my $vmname = $conf->{name} || "vm$vmid";
3675
6884a7d7 3676 push @$cmd, '-name', "$vmname,debug-threads=on";
e4d4cda1 3677
27b25d03
SR
3678 push @$cmd, '-no-shutdown';
3679
1e3baf05
DM
3680 my $use_virtio = 0;
3681
d036e418 3682 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
378ad769 3683 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
c971c4f2
AD
3684 push @$cmd, '-mon', "chardev=qmp,mode=control";
3685
2ea5fb7e 3686 if (min_version($machine_version, 2, 12)) {
b4496b9e 3687 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
71bd73b5
DC
3688 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3689 }
1e3baf05 3690
d036e418 3691 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
19672434 3692
1e3baf05
DM
3693 push @$cmd, '-daemonize';
3694
2796e7d5 3695 if ($conf->{smbios1}) {
1f30ac3a
CE
3696 my $smbios_conf = parse_smbios1($conf->{smbios1});
3697 if ($smbios_conf->{base64}) {
3698 # Do not pass base64 flag to qemu
3699 delete $smbios_conf->{base64};
3700 my $smbios_string = "";
3701 foreach my $key (keys %$smbios_conf) {
3702 my $value;
3703 if ($key eq "uuid") {
3704 $value = $smbios_conf->{uuid}
3705 } else {
3706 $value = decode_base64($smbios_conf->{$key});
3707 }
3708 # qemu accepts any binary data, only commas need escaping by double comma
3709 $value =~ s/,/,,/g;
3710 $smbios_string .= "," . $key . "=" . $value if $value;
3711 }
3712 push @$cmd, '-smbios', "type=1" . $smbios_string;
3713 } else {
3714 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3715 }
2796e7d5
DM
3716 }
3717
3edb45e7 3718 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
b7d80c79
FE
3719 my ($code_drive_str, $var_drive_str) =
3720 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3721 push $cmd->@*, '-drive', $code_drive_str;
3722 push $cmd->@*, '-drive', $var_drive_str;
a783c78e
AD
3723 }
3724
483ceeab 3725 if ($q35) { # tell QEMU to load q35 config early
7583d156 3726 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
2ea5fb7e 3727 if (min_version($machine_version, 4, 0)) {
7583d156
DC
3728 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3729 } else {
3730 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3731 }
3732 }
da8b4189 3733
cc181036
TL
3734 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3735 push @$cmd, $fixups->@*;
3736 }
3737
844d8fa6
DC
3738 if ($conf->{vmgenid}) {
3739 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3740 }
3741
d40e5e18 3742 # add usb controllers
4df98f2f 3743 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
0cf8d56c 3744 $conf, $bridges, $arch, $machine_type, $machine_version);
d40e5e18 3745 push @$devices, @usbcontrollers if @usbcontrollers;
55655ebc 3746 my $vga = parse_vga($conf->{vga});
2fa3151e 3747
55655ebc
DC
3748 my $qxlnum = vga_conf_has_spice($conf->{vga});
3749 $vga->{type} = 'qxl' if $qxlnum;
2fa3151e 3750
55655ebc 3751 if (!$vga->{type}) {
869ad4a7
WB
3752 if ($arch eq 'aarch64') {
3753 $vga->{type} = 'virtio';
2ea5fb7e 3754 } elsif (min_version($machine_version, 2, 9)) {
55655ebc 3755 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3756 } else {
55655ebc 3757 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3758 }
5acbfe9e
DM
3759 }
3760
1e3baf05 3761 # enable absolute mouse coordinates (needed by vnc)
fa3b3ce0
TL
3762 my $tablet = $conf->{tablet};
3763 if (!defined($tablet)) {
5acbfe9e 3764 $tablet = $defaults->{tablet};
590e698c 3765 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
55655ebc 3766 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
5acbfe9e
DM
3767 }
3768
d559309f
WB
3769 if ($tablet) {
3770 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3771 my $kbd = print_keyboarddevice_full($conf, $arch);
3772 push @$devices, '-device', $kbd if defined($kbd);
3773 }
b467f79a 3774
e5d611c3 3775 my $bootorder = device_bootorder($conf);
2141a802 3776
74c17b7a 3777 # host pci device passthrough
9b71c34d
DC
3778 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3779 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
1e3baf05
DM
3780
3781 # usb devices
ae36393d 3782 my $usb_dev_features = {};
2ea5fb7e 3783 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
ae36393d 3784
4df98f2f 3785 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
0cf8d56c 3786 $conf, $usb_dev_features, $bootorder, $machine_version);
d40e5e18 3787 push @$devices, @usbdevices if @usbdevices;
2141a802 3788
1e3baf05 3789 # serial devices
bae179aa 3790 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
fa3b3ce0
TL
3791 my $path = $conf->{"serial$i"} or next;
3792 if ($path eq 'socket') {
3793 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3794 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
7bd9abd2 3795 # On aarch64, serial0 is the UART device. QEMU only allows
fa3b3ce0
TL
3796 # connecting UART devices via the '-serial' command line, as
3797 # the device has a fixed slot on the hardware...
3798 if ($arch eq 'aarch64' && $i == 0) {
3799 push @$devices, '-serial', "chardev:serial$i";
9f9d2fb2 3800 } else {
9f9d2fb2
DM
3801 push @$devices, '-device', "isa-serial,chardev=serial$i";
3802 }
fa3b3ce0
TL
3803 } else {
3804 die "no such serial device\n" if ! -c $path;
e35eb876 3805 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
fa3b3ce0 3806 push @$devices, '-device', "isa-serial,chardev=serial$i";
34978be3 3807 }
1e3baf05
DM
3808 }
3809
3810 # parallel devices
1989a89c 3811 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
34978be3 3812 if (my $path = $conf->{"parallel$i"}) {
19672434 3813 die "no such parallel device\n" if ! -c $path;
e35eb876 3814 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
4c5dbaf6 3815 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
5bdcf937 3816 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
34978be3 3817 }
1e3baf05
DM
3818 }
3819
b01de199 3820 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
2e7b5925 3821 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
1cc5ed1b 3822 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
b01de199 3823 push @$devices, @$audio_devs;
2e7b5925 3824 }
19672434 3825
f9dde219
SR
3826 add_tpm_device($vmid, $devices, $conf);
3827
1e3baf05
DM
3828 my $sockets = 1;
3829 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3830 $sockets = $conf->{sockets} if $conf->{sockets};
3831
3832 my $cores = $conf->{cores} || 1;
3bd18e48 3833
de9d1e55 3834 my $maxcpus = $sockets * $cores;
76267728 3835
de9d1e55 3836 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
76267728 3837
de9d1e55
AD
3838 my $allowed_vcpus = $cpuinfo->{cpus};
3839
483ceeab 3840 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
1e3baf05 3841
483ceeab 3842 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
69c81430
AD
3843 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3844 for (my $i = 2; $i <= $vcpus; $i++) {
3845 my $cpustr = print_cpu_device($conf,$i);
3846 push @$cmd, '-device', $cpustr;
3847 }
3848
3849 } else {
3850
3851 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3852 }
1e3baf05
DM
3853 push @$cmd, '-nodefaults';
3854
dbea4415 3855 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
1e3baf05 3856
0f704640 3857 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
1e3baf05 3858
6b64503e 3859 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
1e3baf05 3860
84902837 3861 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
4df98f2f
TL
3862 push @$devices, '-device', print_vga_device(
3863 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
6f070e39
TL
3864
3865 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3866
d036e418 3867 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
378ad769 3868 push @$cmd, '-vnc', "unix:$socket,password=on";
b7be4ba9 3869 } else {
55655ebc 3870 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
b7be4ba9
AD
3871 push @$cmd, '-nographic';
3872 }
3873
1e3baf05 3874 # time drift fix
6b64503e 3875 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
8c559505 3876 my $useLocaltime = $conf->{localtime};
1e3baf05 3877
4317f69f
AD
3878 if ($winversion >= 5) { # windows
3879 $useLocaltime = 1 if !defined($conf->{localtime});
7a131888 3880
4317f69f
AD
3881 # use time drift fix when acpi is enabled
3882 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3883 $tdf = 1 if !defined($conf->{tdf});
462e8d19 3884 }
4317f69f 3885 }
462e8d19 3886
4317f69f
AD
3887 if ($winversion >= 6) {
3888 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
17bacc21 3889 push @$machineFlags, 'hpet=off';
1e3baf05
DM
3890 }
3891
8c559505
DM
3892 push @$rtcFlags, 'driftfix=slew' if $tdf;
3893
2f6f002c 3894 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
8c559505
DM
3895 push @$rtcFlags, "base=$conf->{startdate}";
3896 } elsif ($useLocaltime) {
3897 push @$rtcFlags, 'base=localtime';
3898 }
1e3baf05 3899
58c64ad5
SR
3900 if ($forcecpu) {
3901 push @$cmd, '-cpu', $forcecpu;
3902 } else {
2f6f002c 3903 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
58c64ad5 3904 }
519ed28c 3905
dafb728c
AD
3906 PVE::QemuServer::Memory::config(
3907 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
370b05e7 3908
1e3baf05
DM
3909 push @$cmd, '-S' if $conf->{freeze};
3910
b20df606 3911 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
1e3baf05 3912
48657158
MD
3913 my $guest_agent = parse_guest_agent($conf);
3914
3915 if ($guest_agent->{enabled}) {
d036e418 3916 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
378ad769 3917 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
48657158 3918
60f03a11 3919 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
48657158
MD
3920 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3921 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3922 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3923 } elsif ($guest_agent->{type} eq 'isa') {
3924 push @$devices, '-device', "isa-serial,chardev=qga0";
3925 }
ab6a046f
AD
3926 }
3927
e5d611c3
TL
3928 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3929 if ($rng && $version_guard->(4, 1, 2)) {
05853188
SR
3930 check_rng_source($rng->{source});
3931
2cf61f33
SR
3932 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3933 my $period = $rng->{period} // $rng_fmt->{period}->{default};
2cf61f33
SR
3934 my $limiter_str = "";
3935 if ($max_bytes) {
3936 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3937 }
3938
2cf61f33 3939 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
2cf61f33
SR
3940 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3941 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3942 }
3943
1d794448 3944 my $spice_port;
2fa3151e 3945
f8ea1b30 3946 if ($qxlnum || $vga->{type} =~ /^virtio/) {
590e698c 3947 if ($qxlnum > 1) {
ac087616 3948 if ($winversion){
2f6f002c 3949 for (my $i = 1; $i < $qxlnum; $i++){
4df98f2f
TL
3950 push @$devices, '-device', print_vga_device(
3951 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
590e698c
DM
3952 }
3953 } else {
3954 # assume other OS works like Linux
55655ebc
DC
3955 my ($ram, $vram) = ("134217728", "67108864");
3956 if ($vga->{memory}) {
3957 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3958 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3959 }
3960 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3961 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
2fa3151e
AD
3962 }
3963 }
3964
d559309f 3965 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
95a4b4a9 3966
af0eba7e 3967 my $pfamily = PVE::Tools::get_host_address_family($nodename);
91152441
WB
3968 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3969 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
4d316a63
AL
3970
3971 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3972 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3973 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3974
91152441
WB
3975 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3976 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
943340a6 3977
4df98f2f
TL
3978 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3979 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
caab114a
TL
3980 if ($spice_enhancement->{foldersharing}) {
3981 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3982 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3983 }
c4df18db 3984
caab114a 3985 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
4df98f2f
TL
3986 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3987 if $spice_enhancement->{videostreaming};
3988
caab114a 3989 push @$devices, '-spice', "$spice_opts";
1011b570
DM
3990 }
3991
8d9ae0d2
DM
3992 # enable balloon by default, unless explicitly disabled
3993 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3326ae19 3994 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
c70e4ec3
AD
3995 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3996 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3997 push @$devices, '-device', $ballooncmd;
8d9ae0d2 3998 }
1e3baf05 3999
0ea9541d
DM
4000 if ($conf->{watchdog}) {
4001 my $wdopts = parse_watchdog($conf->{watchdog});
3326ae19 4002 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
0a40e8ea 4003 my $watchdog = $wdopts->{model} || 'i6300esb';
5bdcf937
AD
4004 push @$devices, '-device', "$watchdog$pciaddr";
4005 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
0ea9541d
DM
4006 }
4007
1e3baf05 4008 my $vollist = [];
941e0c42 4009 my $scsicontroller = {};
26ee04b6 4010 my $ahcicontroller = {};
cdd20088 4011 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
1e3baf05 4012
5881b913
DM
4013 # Add iscsi initiator name if available
4014 if (my $initiator = get_initiator_name()) {
4015 push @$devices, '-iscsi', "initiator-name=$initiator";
4016 }
4017
912792e2 4018 PVE::QemuConfig->foreach_volume($conf, sub {
1e3baf05
DM
4019 my ($ds, $drive) = @_;
4020
ff1a2432 4021 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3f11f0d7 4022 check_volume_storage_type($storecfg, $drive->{file});
1e3baf05 4023 push @$vollist, $drive->{file};
ff1a2432 4024 }
afdb31d5 4025
4dcce9ee
TL
4026 # ignore efidisk here, already added in bios/fw handling code above
4027 return if $drive->{interface} eq 'efidisk';
f9dde219
SR
4028 # similar for TPM
4029 return if $drive->{interface} eq 'tpmstate';
4dcce9ee 4030
1e3baf05 4031 $use_virtio = 1 if $ds =~ m/^virtio/;
3b408e82 4032
2141a802 4033 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3b408e82 4034
2f6f002c 4035 if ($drive->{interface} eq 'virtio'){
51f492cd
AD
4036 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4037 }
4038
2f6f002c 4039 if ($drive->{interface} eq 'scsi') {
cdd20088 4040
ee034f5c 4041 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
6731a4cf 4042
b8fb1c03
SR
4043 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4044 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4045
3326ae19 4046 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
a1b7d579 4047 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
fc8b40fd
AD
4048
4049 my $iothread = '';
4050 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4051 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4052 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
e7a5104d 4053 } elsif ($drive->{iothread}) {
d80ad18c
MH
4054 log_warn(
4055 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4056 );
fc8b40fd
AD
4057 }
4058
6e11f143
AD
4059 my $queues = '';
4060 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4061 $queues = ",num_queues=$drive->{queues}";
370b05e7 4062 }
6e11f143 4063
4df98f2f
TL
4064 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4065 if !$scsicontroller->{$controller};
cdd20088 4066 $scsicontroller->{$controller}=1;
2f6f002c 4067 }
3b408e82 4068
26ee04b6 4069 if ($drive->{interface} eq 'sata') {
2f6f002c 4070 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3326ae19 4071 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4df98f2f
TL
4072 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4073 if !$ahcicontroller->{$controller};
2f6f002c 4074 $ahcicontroller->{$controller}=1;
26ee04b6 4075 }
46f58b5f 4076
5921764c
SR
4077 my $pbs_conf = $pbs_backing->{$ds};
4078 my $pbs_name = undef;
4079 if ($pbs_conf) {
4080 $pbs_name = "drive-$ds-pbs";
4081 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4082 }
4083
6d5673c3
SR
4084 my $drive_cmd = print_drive_commandline_full(
4085 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3dc33a72
FG
4086
4087 # extra protection for templates, but SATA and IDE don't support it..
75748d44 4088 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4ef13a7f 4089
15b21acc 4090 push @$devices, '-drive',$drive_cmd;
4df98f2f
TL
4091 push @$devices, '-device', print_drivedevice_full(
4092 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
1e3baf05
DM
4093 });
4094
cc4d6182 4095 for (my $i = 0; $i < $MAX_NETS; $i++) {
2141a802
SR
4096 my $netname = "net$i";
4097
4098 next if !$conf->{$netname};
4099 my $d = parse_net($conf->{$netname});
d0a86b24 4100 next if !$d;
4ddd2ca2 4101 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
1e3baf05 4102
d0a86b24 4103 $use_virtio = 1 if $d->{model} eq 'virtio';
1e3baf05 4104
2141a802 4105 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
1e3baf05 4106
2141a802 4107 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
d0a86b24 4108 push @$devices, '-netdev', $netdevfull;
5bdcf937 4109
d0a86b24 4110 my $netdevicefull = print_netdevice_full(
0c03a390 4111 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4df98f2f 4112
d0a86b24 4113 push @$devices, '-device', $netdevicefull;
5bdcf937 4114 }
1e3baf05 4115
6dbcb073 4116 if ($conf->{ivshmem}) {
4df98f2f 4117 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
e3c27a6a 4118
6dbcb073
DC
4119 my $bus;
4120 if ($q35) {
4121 $bus = print_pcie_addr("ivshmem");
4122 } else {
4123 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4124 }
e3c27a6a
TL
4125
4126 my $ivshmem_name = $ivshmem->{name} // $vmid;
4127 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4128
6dbcb073 4129 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4df98f2f
TL
4130 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4131 .",size=$ivshmem->{size}M";
6dbcb073
DC
4132 }
4133
2513b862
DC
4134 # pci.4 is nested in pci.1
4135 $bridges->{1} = 1 if $bridges->{4};
4136
3326ae19
TL
4137 if (!$q35) { # add pci bridges
4138 if (min_version($machine_version, 2, 3)) {
fc79e813
AD
4139 $bridges->{1} = 1;
4140 $bridges->{2} = 1;
4141 }
6731a4cf 4142 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
2513b862
DC
4143 }
4144
4145 for my $k (sort {$b cmp $a} keys %$bridges) {
4146 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
13d68979
SR
4147
4148 my $k_name = $k;
4149 if ($k == 2 && $legacy_igd) {
4150 $k_name = "$k-igd";
4151 }
3326ae19 4152 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
2513b862 4153 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3326ae19
TL
4154
4155 if ($q35) { # add after -readconfig pve-q35.cfg
2513b862
DC
4156 splice @$devices, 2, 0, '-device', $devstr;
4157 } else {
4158 unshift @$devices, '-device', $devstr if $k > 0;
f8e83f05 4159 }
19672434
DM
4160 }
4161
ac0077cc
SR
4162 if (!$kvm) {
4163 push @$machineFlags, 'accel=tcg';
4164 }
4165
e4263214 4166 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
e5a6919c 4167
ac0077cc
SR
4168 my $machine_type_min = $machine_type;
4169 if ($add_pve_version) {
4170 $machine_type_min =~ s/\+pve\d+$//;
4171 $machine_type_min .= "+pve$required_pve_version";
4172 }
4173 push @$machineFlags, "type=${machine_type_min}";
4174
5bdcf937 4175 push @$cmd, @$devices;
2f6f002c
TL
4176 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4177 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4178 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
8c559505 4179
7ceade4c
DC
4180 if (my $vmstate = $conf->{vmstate}) {
4181 my $statepath = PVE::Storage::path($storecfg, $vmstate);
24d1f93a 4182 push @$vollist, $vmstate;
7ceade4c 4183 push @$cmd, '-loadstate', $statepath;
b85666cf 4184 print "activating and using '$vmstate' as vmstate\n";
7ceade4c
DC
4185 }
4186
85fcf79e
FG
4187 if (PVE::QemuConfig->is_template($conf)) {
4188 # needed to workaround base volumes being read-only
4189 push @$cmd, '-snapshot';
4190 }
4191
76350670
DC
4192 # add custom args
4193 if ($conf->{args}) {
4194 my $aa = PVE::Tools::split_args($conf->{args});
4195 push @$cmd, @$aa;
4196 }
4197
9b71c34d 4198 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
1e3baf05 4199}
19672434 4200
05853188
SR
4201sub check_rng_source {
4202 my ($source) = @_;
4203
4204 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4205 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4206 if ! -e $source;
4207
4208 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4209 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4df98f2f
TL
4210 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4211 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4212 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4213 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4214 ." to the host.\n";
05853188
SR
4215 }
4216}
4217
943340a6 4218sub spice_port {
1011b570 4219 my ($vmid) = @_;
943340a6 4220
0a13e08e 4221 my $res = mon_cmd($vmid, 'query-spice');
943340a6
DM
4222
4223 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
1011b570
DM
4224}
4225
86fdcfb2
DA
4226sub vm_devices_list {
4227 my ($vmid) = @_;
4228
0a13e08e 4229 my $res = mon_cmd($vmid, 'query-pci');
f721624b 4230 my $devices_to_check = [];
ceea9078
DM
4231 my $devices = {};
4232 foreach my $pcibus (@$res) {
f721624b
DC
4233 push @$devices_to_check, @{$pcibus->{devices}},
4234 }
4235
4236 while (@$devices_to_check) {
4237 my $to_check = [];
4238 for my $d (@$devices_to_check) {
4239 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
b3a3e929 4240 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
f721624b
DC
4241
4242 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4243 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
f78cc802 4244 }
f721624b 4245 $devices_to_check = $to_check;
f78cc802
AD
4246 }
4247
0a13e08e 4248 my $resblock = mon_cmd($vmid, 'query-block');
f78cc802
AD
4249 foreach my $block (@$resblock) {
4250 if($block->{device} =~ m/^drive-(\S+)/){
4251 $devices->{$1} = 1;
1dc4f496
DM
4252 }
4253 }
86fdcfb2 4254
0a13e08e 4255 my $resmice = mon_cmd($vmid, 'query-mice');
3d7389fe
DM
4256 foreach my $mice (@$resmice) {
4257 if ($mice->{name} eq 'QEMU HID Tablet') {
4258 $devices->{tablet} = 1;
4259 last;
4260 }
4261 }
4262
deb091c5
DC
4263 # for usb devices there is no query-usb
4264 # but we can iterate over the entries in
4265 # qom-list path=/machine/peripheral
0a13e08e 4266 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
deb091c5 4267 foreach my $per (@$resperipheral) {
c60cad61 4268 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
deb091c5
DC
4269 $devices->{$per->{name}} = 1;
4270 }
4271 }
4272
1dc4f496 4273 return $devices;
86fdcfb2
DA
4274}
4275
ec21aa11 4276sub vm_deviceplug {
d559309f 4277 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
ae57f6b3 4278
3392d6ca 4279 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f 4280
95d6343b
DA
4281 my $devices_list = vm_devices_list($vmid);
4282 return 1 if defined($devices_list->{$deviceid});
4283
4df98f2f
TL
4284 # add PCI bridge if we need it for the device
4285 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
fee46675 4286
3d7389fe 4287 if ($deviceid eq 'tablet') {
d559309f 4288 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
d559309f 4289 } elsif ($deviceid eq 'keyboard') {
d559309f 4290 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
c60cad61
DC
4291 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4292 my $id = $1;
4293 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4294 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4eb68604 4295 } elsif ($deviceid =~ m/^usb(\d+)$/) {
c60cad61 4296 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
fee46675 4297 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
22de899a
AD
4298 qemu_iothread_add($vmid, $deviceid, $device);
4299
3326ae19
TL
4300 qemu_driveadd($storecfg, $vmid, $device);
4301 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675 4302
3326ae19 4303 qemu_deviceadd($vmid, $devicefull);
fee46675
DM
4304 eval { qemu_deviceaddverify($vmid, $deviceid); };
4305 if (my $err = $@) {
63c2da2f
DM
4306 eval { qemu_drivedel($vmid, $deviceid); };
4307 warn $@ if $@;
fee46675 4308 die $err;
5e5dcb73 4309 }
2733141c 4310 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
3326ae19
TL
4311 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4312 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
a1b7d579 4313 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
2733141c 4314
3326ae19 4315 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
fee46675 4316
fc8b40fd
AD
4317 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4318 qemu_iothread_add($vmid, $deviceid, $device);
4319 $devicefull .= ",iothread=iothread-$deviceid";
4320 }
4321
6e11f143
AD
4322 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4323 $devicefull .= ",num_queues=$device->{queues}";
4324 }
4325
3326ae19
TL
4326 qemu_deviceadd($vmid, $devicefull);
4327 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4328 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
d559309f 4329 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
fee46675 4330 qemu_driveadd($storecfg, $vmid, $device);
a1b7d579 4331
acfc6ef8 4332 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675
DM
4333 eval { qemu_deviceadd($vmid, $devicefull); };
4334 if (my $err = $@) {
63c2da2f
DM
4335 eval { qemu_drivedel($vmid, $deviceid); };
4336 warn $@ if $@;
fee46675 4337 die $err;
a4f091a0 4338 }
fee46675 4339 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
d1c1af4b 4340 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
8718099c 4341
3392d6ca 4342 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
0c03a390 4343 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
95d3be58
DC
4344 my $use_old_bios_files = undef;
4345 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
8718099c 4346
4df98f2f 4347 my $netdevicefull = print_netdevice_full(
0c03a390 4348 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
95d3be58 4349 qemu_deviceadd($vmid, $netdevicefull);
79046fd1
DC
4350 eval {
4351 qemu_deviceaddverify($vmid, $deviceid);
4352 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4353 };
fee46675
DM
4354 if (my $err = $@) {
4355 eval { qemu_netdevdel($vmid, $deviceid); };
4356 warn $@ if $@;
4357 die $err;
95d3be58 4358 }
fee46675 4359 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
40f28a9f 4360 my $bridgeid = $2;
d559309f 4361 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
40f28a9f 4362 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
a1b7d579 4363
40f28a9f 4364 qemu_deviceadd($vmid, $devicefull);
fee46675 4365 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4366 } else {
a1b7d579 4367 die "can't hotplug device '$deviceid'\n";
40f28a9f
AD
4368 }
4369
5e5dcb73 4370 return 1;
a4dea331
DA
4371}
4372
3eec5767 4373# fixme: this should raise exceptions on error!
ec21aa11 4374sub vm_deviceunplug {
f19d1c47 4375 my ($vmid, $conf, $deviceid) = @_;
873c2d69 4376
95d6343b
DA
4377 my $devices_list = vm_devices_list($vmid);
4378 return 1 if !defined($devices_list->{$deviceid});
4379
2141a802
SR
4380 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4381 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
63c2da2f 4382
c60cad61 4383 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
3d7389fe 4384 qemu_devicedel($vmid, $deviceid);
c60cad61
DC
4385 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4386 qemu_devicedel($vmid, $deviceid);
4387 qemu_devicedelverify($vmid, $deviceid);
4eb68604 4388 } elsif ($deviceid =~ m/^usb\d+$/) {
c60cad61
DC
4389 qemu_devicedel($vmid, $deviceid);
4390 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4391 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
9a66c311 4392 my $device = parse_drive($deviceid, $conf->{$deviceid});
f19d1c47 4393
a8d0fec3
TL
4394 qemu_devicedel($vmid, $deviceid);
4395 qemu_devicedelverify($vmid, $deviceid);
4396 qemu_drivedel($vmid, $deviceid);
9a66c311 4397 qemu_iothread_del($vmid, $deviceid, $device);
2733141c 4398 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
63c2da2f 4399 qemu_devicedel($vmid, $deviceid);
8ce30dde 4400 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4401 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
9a66c311 4402 my $device = parse_drive($deviceid, $conf->{$deviceid});
cfc817c7 4403
a8d0fec3 4404 qemu_devicedel($vmid, $deviceid);
52b361af 4405 qemu_devicedelverify($vmid, $deviceid);
a8d0fec3 4406 qemu_drivedel($vmid, $deviceid);
a1b7d579 4407 qemu_deletescsihw($conf, $vmid, $deviceid);
8ce30dde 4408
9a66c311
FG
4409 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4410 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
63c2da2f 4411 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
a8d0fec3
TL
4412 qemu_devicedel($vmid, $deviceid);
4413 qemu_devicedelverify($vmid, $deviceid);
4414 qemu_netdevdel($vmid, $deviceid);
63c2da2f
DM
4415 } else {
4416 die "can't unplug device '$deviceid'\n";
2630d2a9
DA
4417 }
4418
5e5dcb73
DA
4419 return 1;
4420}
4421
c60cad61
DC
4422sub qemu_spice_usbredir_chardev_add {
4423 my ($vmid, $id) = @_;
4424
4425 mon_cmd($vmid, "chardev-add" , (
4426 id => $id,
4427 backend => {
4428 type => 'spicevmc',
4429 data => {
4430 type => "usbredir",
4431 },
4432 },
4433 ));
4434}
4435
5e5dcb73
DA
4436sub qemu_deviceadd {
4437 my ($vmid, $devicefull) = @_;
873c2d69 4438
d695b5b7
AD
4439 $devicefull = "driver=".$devicefull;
4440 my %options = split(/[=,]/, $devicefull);
f19d1c47 4441
0a13e08e 4442 mon_cmd($vmid, "device_add" , %options);
5e5dcb73 4443}
afdb31d5 4444
5e5dcb73 4445sub qemu_devicedel {
fee46675 4446 my ($vmid, $deviceid) = @_;
63c2da2f 4447
0a13e08e 4448 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
5e5dcb73
DA
4449}
4450
22de899a 4451sub qemu_iothread_add {
a8d0fec3 4452 my ($vmid, $deviceid, $device) = @_;
22de899a
AD
4453
4454 if ($device->{iothread}) {
4455 my $iothreads = vm_iothreads_list($vmid);
4456 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4457 }
4458}
4459
4460sub qemu_iothread_del {
a8d0fec3 4461 my ($vmid, $deviceid, $device) = @_;
22de899a 4462
22de899a
AD
4463 if ($device->{iothread}) {
4464 my $iothreads = vm_iothreads_list($vmid);
4465 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4466 }
4467}
4468
4d3f29ed 4469sub qemu_objectadd {
a8d0fec3 4470 my ($vmid, $objectid, $qomtype) = @_;
4d3f29ed 4471
0a13e08e 4472 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4d3f29ed
AD
4473
4474 return 1;
4475}
4476
4477sub qemu_objectdel {
a8d0fec3 4478 my ($vmid, $objectid) = @_;
4d3f29ed 4479
0a13e08e 4480 mon_cmd($vmid, "object-del", id => $objectid);
4d3f29ed
AD
4481
4482 return 1;
4483}
4484
5e5dcb73 4485sub qemu_driveadd {
fee46675 4486 my ($storecfg, $vmid, $device) = @_;
5e5dcb73 4487
6d5673c3
SR
4488 my $kvmver = get_running_qemu_version($vmid);
4489 my $io_uring = min_version($kvmver, 6, 0);
4490 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
7a69fc3c 4491 $drive =~ s/\\/\\\\/g;
0a13e08e 4492 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
fee46675 4493
5e5dcb73 4494 # If the command succeeds qemu prints: "OK"
fee46675
DM
4495 return 1 if $ret =~ m/OK/s;
4496
4497 die "adding drive failed: $ret\n";
5e5dcb73 4498}
afdb31d5 4499
5e5dcb73 4500sub qemu_drivedel {
a8d0fec3 4501 my ($vmid, $deviceid) = @_;
873c2d69 4502
0a13e08e 4503 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
5e5dcb73 4504 $ret =~ s/^\s+//;
a1b7d579 4505
63c2da2f 4506 return 1 if $ret eq "";
a1b7d579 4507
63c2da2f 4508 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
a1b7d579
DM
4509 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4510
63c2da2f 4511 die "deleting drive $deviceid failed : $ret\n";
5e5dcb73 4512}
f19d1c47 4513
5e5dcb73 4514sub qemu_deviceaddverify {
fee46675 4515 my ($vmid, $deviceid) = @_;
873c2d69 4516
5e5dcb73
DA
4517 for (my $i = 0; $i <= 5; $i++) {
4518 my $devices_list = vm_devices_list($vmid);
4519 return 1 if defined($devices_list->{$deviceid});
4520 sleep 1;
afdb31d5 4521 }
fee46675
DM
4522
4523 die "error on hotplug device '$deviceid'\n";
5e5dcb73 4524}
afdb31d5 4525
5e5dcb73
DA
4526
4527sub qemu_devicedelverify {
63c2da2f
DM
4528 my ($vmid, $deviceid) = @_;
4529
a1b7d579 4530 # need to verify that the device is correctly removed as device_del
63c2da2f 4531 # is async and empty return is not reliable
5e5dcb73 4532
5e5dcb73
DA
4533 for (my $i = 0; $i <= 5; $i++) {
4534 my $devices_list = vm_devices_list($vmid);
4535 return 1 if !defined($devices_list->{$deviceid});
4536 sleep 1;
afdb31d5 4537 }
63c2da2f
DM
4538
4539 die "error on hot-unplugging device '$deviceid'\n";
873c2d69
DA
4540}
4541
cdd20088 4542sub qemu_findorcreatescsihw {
d559309f 4543 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
cfc817c7 4544
ee034f5c 4545 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
2733141c
AD
4546
4547 my $scsihwid="$controller_prefix$controller";
cfc817c7
DA
4548 my $devices_list = vm_devices_list($vmid);
4549
a8d0fec3 4550 if (!defined($devices_list->{$scsihwid})) {
d559309f 4551 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
cfc817c7 4552 }
fee46675 4553
cfc817c7
DA
4554 return 1;
4555}
4556
8ce30dde
AD
4557sub qemu_deletescsihw {
4558 my ($conf, $vmid, $opt) = @_;
4559
4560 my $device = parse_drive($opt, $conf->{$opt});
4561
a1511b3c 4562 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
2733141c
AD
4563 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4564 return 1;
4565 }
4566
ee034f5c 4567 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
8ce30dde
AD
4568
4569 my $devices_list = vm_devices_list($vmid);
4570 foreach my $opt (keys %{$devices_list}) {
e0fd2b2f
FE
4571 if (is_valid_drivename($opt)) {
4572 my $drive = parse_drive($opt, $conf->{$opt});
a8d0fec3 4573 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
8ce30dde
AD
4574 return 1;
4575 }
4576 }
4577 }
4578
4579 my $scsihwid="scsihw$controller";
4580
4581 vm_deviceunplug($vmid, $conf, $scsihwid);
4582
4583 return 1;
4584}
4585
281fedb3 4586sub qemu_add_pci_bridge {
d559309f 4587 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
40f28a9f
AD
4588
4589 my $bridges = {};
281fedb3
DM
4590
4591 my $bridgeid;
4592
d559309f 4593 print_pci_addr($device, $bridges, $arch, $machine_type);
40f28a9f
AD
4594
4595 while (my ($k, $v) = each %$bridges) {
4596 $bridgeid = $k;
4597 }
fee46675 4598 return 1 if !defined($bridgeid) || $bridgeid < 1;
281fedb3 4599
40f28a9f
AD
4600 my $bridge = "pci.$bridgeid";
4601 my $devices_list = vm_devices_list($vmid);
4602
281fedb3 4603 if (!defined($devices_list->{$bridge})) {
d559309f 4604 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
40f28a9f 4605 }
281fedb3 4606
40f28a9f
AD
4607 return 1;
4608}
4609
25088687
DM
4610sub qemu_set_link_status {
4611 my ($vmid, $device, $up) = @_;
4612
0a13e08e 4613 mon_cmd($vmid, "set_link", name => $device,
25088687
DM
4614 up => $up ? JSON::true : JSON::false);
4615}
4616
2630d2a9 4617sub qemu_netdevadd {
d559309f 4618 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
2630d2a9 4619
d559309f 4620 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
73aa03b8 4621 my %options = split(/[=,]/, $netdev);
2630d2a9 4622
bf5aef9b
DC
4623 if (defined(my $vhost = $options{vhost})) {
4624 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4625 }
4626
4627 if (defined(my $queues = $options{queues})) {
4628 $options{queues} = $queues + 0;
4629 }
4630
0a13e08e 4631 mon_cmd($vmid, "netdev_add", %options);
73aa03b8 4632 return 1;
2630d2a9
DA
4633}
4634
4635sub qemu_netdevdel {
4636 my ($vmid, $deviceid) = @_;
4637
0a13e08e 4638 mon_cmd($vmid, "netdev_del", id => $deviceid);
2630d2a9
DA
4639}
4640
16521d63 4641sub qemu_usb_hotplug {
d559309f 4642 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
16521d63
DC
4643
4644 return if !$device;
4645
4646 # remove the old one first
4647 vm_deviceunplug($vmid, $conf, $deviceid);
4648
4649 # check if xhci controller is necessary and available
c60cad61 4650 my $devicelist = vm_devices_list($vmid);
16521d63 4651
c60cad61
DC
4652 if (!$devicelist->{xhci}) {
4653 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4654 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
16521d63 4655 }
c60cad61 4656
16521d63 4657 # add the new one
0cf8d56c 4658 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
16521d63
DC
4659}
4660
838776ab 4661sub qemu_cpu_hotplug {
8edc9c08 4662 my ($vmid, $conf, $vcpus) = @_;
838776ab 4663
3392d6ca 4664 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
1e881b75 4665
8edc9c08
AD
4666 my $sockets = 1;
4667 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4668 $sockets = $conf->{sockets} if $conf->{sockets};
4669 my $cores = $conf->{cores} || 1;
4670 my $maxcpus = $sockets * $cores;
838776ab 4671
8edc9c08 4672 $vcpus = $maxcpus if !$vcpus;
3a11fadb 4673
8edc9c08
AD
4674 die "you can't add more vcpus than maxcpus\n"
4675 if $vcpus > $maxcpus;
3a11fadb 4676
8edc9c08 4677 my $currentvcpus = $conf->{vcpus} || $maxcpus;
1e881b75 4678
eba3e64d 4679 if ($vcpus < $currentvcpus) {
1e881b75 4680
2ea5fb7e 4681 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
1e881b75
AD
4682
4683 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4684 qemu_devicedel($vmid, "cpu$i");
4685 my $retry = 0;
4686 my $currentrunningvcpus = undef;
4687 while (1) {
65af8c31 4688 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
1e881b75 4689 last if scalar(@{$currentrunningvcpus}) == $i-1;
961af8a3 4690 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
1e881b75
AD
4691 $retry++;
4692 sleep 1;
4693 }
4694 #update conf after each succesfull cpu unplug
4695 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4696 PVE::QemuConfig->write_config($vmid, $conf);
4697 }
4698 } else {
961af8a3 4699 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
1e881b75
AD
4700 }
4701
4702 return;
4703 }
838776ab 4704
65af8c31 4705 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
961af8a3 4706 die "vcpus in running vm does not match its configuration\n"
8edc9c08 4707 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
838776ab 4708
2ea5fb7e 4709 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
eba3e64d
AD
4710
4711 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4712 my $cpustr = print_cpu_device($conf, $i);
4713 qemu_deviceadd($vmid, $cpustr);
4714
4715 my $retry = 0;
4716 my $currentrunningvcpus = undef;
4717 while (1) {
65af8c31 4718 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
eba3e64d 4719 last if scalar(@{$currentrunningvcpus}) == $i;
961af8a3 4720 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
eba3e64d
AD
4721 sleep 1;
4722 $retry++;
4723 }
4724 #update conf after each succesfull cpu hotplug
4725 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4726 PVE::QemuConfig->write_config($vmid, $conf);
4727 }
4728 } else {
4729
4730 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
0a13e08e 4731 mon_cmd($vmid, "cpu-add", id => int($i));
eba3e64d 4732 }
838776ab
AD
4733 }
4734}
4735
affd2f88 4736sub qemu_block_set_io_throttle {
277ca170
WB
4737 my ($vmid, $deviceid,
4738 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
9196a8ec
WB
4739 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4740 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4741 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
affd2f88 4742
f3f323a3
AD
4743 return if !check_running($vmid) ;
4744
0a13e08e 4745 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
277ca170
WB
4746 bps => int($bps),
4747 bps_rd => int($bps_rd),
4748 bps_wr => int($bps_wr),
4749 iops => int($iops),
4750 iops_rd => int($iops_rd),
4751 iops_wr => int($iops_wr),
4752 bps_max => int($bps_max),
4753 bps_rd_max => int($bps_rd_max),
4754 bps_wr_max => int($bps_wr_max),
4755 iops_max => int($iops_max),
4756 iops_rd_max => int($iops_rd_max),
9196a8ec
WB
4757 iops_wr_max => int($iops_wr_max),
4758 bps_max_length => int($bps_max_length),
4759 bps_rd_max_length => int($bps_rd_max_length),
4760 bps_wr_max_length => int($bps_wr_max_length),
4761 iops_max_length => int($iops_max_length),
4762 iops_rd_max_length => int($iops_rd_max_length),
4763 iops_wr_max_length => int($iops_wr_max_length),
277ca170 4764 );
f3f323a3 4765
affd2f88
AD
4766}
4767
c1175c92
AD
4768sub qemu_block_resize {
4769 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4770
ed221350 4771 my $running = check_running($vmid);
c1175c92 4772
2e4357c5 4773 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
c1175c92
AD
4774
4775 return if !$running;
4776
375db731
FE
4777 my $padding = (1024 - $size % 1024) % 1024;
4778 $size = $size + $padding;
4779
190c8461
SR
4780 mon_cmd(
4781 $vmid,
4782 "block_resize",
4783 device => $deviceid,
4784 size => int($size),
4785 timeout => 60,
4786 );
c1175c92
AD
4787}
4788
1ab0057c
AD
4789sub qemu_volume_snapshot {
4790 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4791
ed221350 4792 my $running = check_running($vmid);
1ab0057c 4793
9d83932d 4794 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4795 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
e5eaa028
WL
4796 } else {
4797 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4798 }
1ab0057c
AD
4799}
4800
fc46aff9
AD
4801sub qemu_volume_snapshot_delete {
4802 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4803
ed221350 4804 my $running = check_running($vmid);
fc46aff9 4805
a2f1554b
AD
4806 if($running) {
4807
4808 $running = undef;
4809 my $conf = PVE::QemuConfig->load_config($vmid);
912792e2 4810 PVE::QemuConfig->foreach_volume($conf, sub {
a2f1554b
AD
4811 my ($ds, $drive) = @_;
4812 $running = 1 if $drive->{file} eq $volid;
4813 });
4814 }
4815
9d83932d 4816 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4817 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
1ef7592f
AD
4818 } else {
4819 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4820 }
fc46aff9
AD
4821}
4822
264e519f 4823sub set_migration_caps {
27a5be53 4824 my ($vmid, $savevm) = @_;
a89fded1 4825
acc10e51
SR
4826 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4827
27a5be53
SR
4828 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4829 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4830
8b8345f3 4831 my $cap_ref = [];
a89fded1
AD
4832
4833 my $enabled_cap = {
8b8345f3 4834 "auto-converge" => 1,
0b0a47e8 4835 "xbzrle" => 1,
8b8345f3
DM
4836 "x-rdma-pin-all" => 0,
4837 "zero-blocks" => 0,
acc10e51 4838 "compress" => 0,
27a5be53 4839 "dirty-bitmaps" => $dirty_bitmaps,
a89fded1
AD
4840 };
4841
0a13e08e 4842 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
a89fded1 4843
8b8345f3 4844 for my $supported_capability (@$supported_capabilities) {
b463a3ce
SP
4845 push @$cap_ref, {
4846 capability => $supported_capability->{capability},
22430fa2
DM
4847 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4848 };
a89fded1
AD
4849 }
4850
0a13e08e 4851 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
8b8345f3 4852}
a89fded1 4853
912792e2 4854sub foreach_volid {
0b7a0b78 4855 my ($conf, $func, @param) = @_;
912792e2
FE
4856
4857 my $volhash = {};
4858
4859 my $test_volid = sub {
6328c554 4860 my ($key, $drive, $snapname, $pending) = @_;
912792e2 4861
ae180b8f 4862 my $volid = $drive->{file};
912792e2
FE
4863 return if !$volid;
4864
4865 $volhash->{$volid}->{cdrom} //= 1;
ae180b8f 4866 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
912792e2 4867
ae180b8f 4868 my $replicate = $drive->{replicate} // 1;
912792e2
FE
4869 $volhash->{$volid}->{replicate} //= 0;
4870 $volhash->{$volid}->{replicate} = 1 if $replicate;
4871
4872 $volhash->{$volid}->{shared} //= 0;
ae180b8f 4873 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
912792e2 4874
6e9c4929
AL
4875 $volhash->{$volid}->{is_unused} //= 0;
4876 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4877
4878 $volhash->{$volid}->{is_attached} //= 0;
4879 $volhash->{$volid}->{is_attached} = 1
219719aa 4880 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
912792e2
FE
4881
4882 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4883 if defined($snapname);
ae180b8f 4884
219719aa 4885 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
6328c554 4886
ae180b8f
FE
4887 my $size = $drive->{size};
4888 $volhash->{$volid}->{size} //= $size if $size;
4889
4890 $volhash->{$volid}->{is_vmstate} //= 0;
4891 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4892
f9dde219
SR
4893 $volhash->{$volid}->{is_tpmstate} //= 0;
4894 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4895
a6be63ac 4896 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
912792e2
FE
4897 };
4898
ae180b8f
FE
4899 my $include_opts = {
4900 extra_keys => ['vmstate'],
4901 include_unused => 1,
4902 };
4903
0b953b8e 4904 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
6328c554 4905
0b7a0b78
AL
4906 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4907 if defined($conf->{pending}) && $conf->{pending}->%*;
6328c554 4908
912792e2
FE
4909 foreach my $snapname (keys %{$conf->{snapshots}}) {
4910 my $snap = $conf->{snapshots}->{$snapname};
0b953b8e 4911 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
912792e2
FE
4912 }
4913
4914 foreach my $volid (keys %$volhash) {
4915 &$func($volid, $volhash->{$volid}, @param);
4916 }
4917}
4918
81d95ae1 4919my $fast_plug_option = {
f68910a0
FE
4920 'description' => 1,
4921 'hookscript' => 1,
7498eb64 4922 'lock' => 1,
d62bdac5
FE
4923 'migrate_downtime' => 1,
4924 'migrate_speed' => 1,
81d95ae1 4925 'name' => 1,
a1b7d579 4926 'onboot' => 1,
f68910a0 4927 'protection' => 1,
81d95ae1
DM
4928 'shares' => 1,
4929 'startup' => 1,
b8e7068a 4930 'tags' => 1,
f68910a0 4931 'vmstatestorage' => 1,
81d95ae1
DM
4932};
4933
71d9006b
AD
4934for my $opt (keys %$confdesc_cloudinit) {
4935 $fast_plug_option->{$opt} = 1;
4936};
4937
3a11fadb
DM
4938# hotplug changes in [PENDING]
4939# $selection hash can be used to only apply specified options, for
4940# example: { cores => 1 } (only apply changed 'cores')
4941# $errors ref is used to return error messages
c427973b 4942sub vmconfig_hotplug_pending {
3a11fadb 4943 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
c427973b 4944
8e90138a 4945 my $defaults = load_defaults();
045749f2
TL
4946 my $arch = get_vm_arch($conf);
4947 my $machine_type = get_vm_machine($conf, undef, $arch);
c427973b
DM
4948
4949 # commit values which do not have any impact on running VM first
3a11fadb
DM
4950 # Note: those option cannot raise errors, we we do not care about
4951 # $selection and always apply them.
4952
4953 my $add_error = sub {
4954 my ($opt, $msg) = @_;
4955 $errors->{$opt} = "hotplug problem - $msg";
4956 };
c427973b 4957
f16cf6c3
WB
4958 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4959
4960 my $cloudinit_record_changed = sub {
4961 my ($conf, $opt, $old, $new) = @_;
4962 return if !$cloudinit_pending_properties->{$opt};
4963
4964 my $ci = ($conf->{cloudinit} //= {});
4965
4966 my $recorded = $ci->{$opt};
a5409851
WB
4967 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4968
4969 if (defined($new)) {
4970 if (defined($old)) {
4971 # an existing value is being modified
4972 if (defined($recorded)) {
4973 # the value was already not in sync
4974 if ($new eq $recorded) {
4975 # a value is being reverted to the cloud-init state:
4976 delete $ci->{$opt};
4977 delete $added{$opt};
4978 } else {
4979 # the value was changed multiple times, do nothing
4980 }
4981 } elsif ($added{$opt}) {
4982 # the value had been marked as added and is being changed, do nothing
4983 } else {
4984 # the value is new, record it:
4985 $ci->{$opt} = $old;
4986 }
f16cf6c3 4987 } else {
a5409851
WB
4988 # a new value is being added
4989 if (defined($recorded)) {
4990 # it was already not in sync
4991 if ($new eq $recorded) {
4992 # a value is being reverted to the cloud-init state:
4993 delete $ci->{$opt};
4994 delete $added{$opt};
4995 } else {
4996 # the value had temporarily been removed, do nothing
4997 }
4998 } elsif ($added{$opt}) {
4999 # the value had been marked as added already, do nothing
5000 } else {
5001 # the value is new, add it
5002 $added{$opt} = 1;
5003 }
f16cf6c3 5004 }
a5409851
WB
5005 } elsif (!defined($old)) {
5006 # a non-existent value is being removed? ignore...
f16cf6c3 5007 } else {
a5409851
WB
5008 # a value is being deleted
5009 if (defined($recorded)) {
5010 # a value was already recorded, just keep it
5011 } elsif ($added{$opt}) {
5012 # the value was marked as added, remove it
5013 delete $added{$opt};
f16cf6c3 5014 } else {
a5409851
WB
5015 # a previously unrecorded value is being removed, record the old value:
5016 $ci->{$opt} = $old;
f16cf6c3
WB
5017 }
5018 }
a5409851
WB
5019
5020 my $added = join(',', sort keys %added);
5021 $ci->{added} = $added if length($added);
f16cf6c3
WB
5022 };
5023
c427973b
DM
5024 my $changes = 0;
5025 foreach my $opt (keys %{$conf->{pending}}) { # add/change
81d95ae1 5026 if ($fast_plug_option->{$opt}) {
f16cf6c3
WB
5027 my $new = delete $conf->{pending}->{$opt};
5028 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
5029 $conf->{$opt} = $new;
c427973b
DM
5030 $changes = 1;
5031 }
5032 }
5033
5034 if ($changes) {
ffda963f 5035 PVE::QemuConfig->write_config($vmid, $conf);
c427973b
DM
5036 }
5037
c60cad61
DC
5038 my $ostype = $conf->{ostype};
5039 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
b3c2bdd1 5040 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
c60cad61
DC
5041 my $usb_hotplug = $hotplug_features->{usb}
5042 && min_version($version, 7, 1)
5043 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
c427973b 5044
5b65b00d 5045 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
98bc3aeb 5046 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
f16cf6c3 5047
d321c4a9 5048 foreach my $opt (sort keys %$pending_delete_hash) {
3a11fadb 5049 next if $selection && !$selection->{$opt};
d321c4a9 5050 my $force = $pending_delete_hash->{$opt}->{force};
3a11fadb 5051 eval {
51a6f637
AD
5052 if ($opt eq 'hotplug') {
5053 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5054 } elsif ($opt eq 'tablet') {
b3c2bdd1 5055 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5056 if ($defaults->{tablet}) {
d559309f
WB
5057 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5058 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5059 if $arch eq 'aarch64';
3a11fadb 5060 } else {
d559309f
WB
5061 vm_deviceunplug($vmid, $conf, 'tablet');
5062 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5063 }
c60cad61
DC
5064 } elsif ($opt =~ m/^usb(\d+)$/) {
5065 my $index = $1;
5066 die "skip\n" if !$usb_hotplug;
5067 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5068 vm_deviceunplug($vmid, $conf, $opt);
8edc9c08 5069 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5070 die "skip\n" if !$hotplug_features->{cpu};
8edc9c08 5071 qemu_cpu_hotplug($vmid, $conf, undef);
9c2f7069 5072 } elsif ($opt eq 'balloon') {
81d95ae1 5073 # enable balloon device is not hotpluggable
75b51053
DC
5074 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5075 # here we reset the ballooning value to memory
5076 my $balloon = $conf->{memory} || $defaults->{memory};
0a13e08e 5077 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
81d95ae1
DM
5078 } elsif ($fast_plug_option->{$opt}) {
5079 # do nothing
3eec5767 5080 } elsif ($opt =~ m/^net(\d+)$/) {
b3c2bdd1 5081 die "skip\n" if !$hotplug_features->{network};
3eec5767 5082 vm_deviceunplug($vmid, $conf, $opt);
74479ee9 5083 } elsif (is_valid_drivename($opt)) {
b3c2bdd1 5084 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
19120f99 5085 vm_deviceunplug($vmid, $conf, $opt);
3dc38fbb 5086 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4d3f29ed
AD
5087 } elsif ($opt =~ m/^memory$/) {
5088 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5089 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
c8effec3 5090 } elsif ($opt eq 'cpuunits') {
0d318453 5091 $cgroup->change_cpu_shares(undef);
58be00f1 5092 } elsif ($opt eq 'cpulimit') {
25de70ae 5093 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
3d7389fe 5094 } else {
e56beeda 5095 die "skip\n";
3d7389fe 5096 }
3a11fadb
DM
5097 };
5098 if (my $err = $@) {
e56beeda
DM
5099 &$add_error($opt, $err) if $err ne "skip\n";
5100 } else {
f16cf6c3
WB
5101 my $old = delete $conf->{$opt};
5102 $cloudinit_record_changed->($conf, $opt, $old, undef);
98bc3aeb 5103 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
3d7389fe 5104 }
3d7389fe
DM
5105 }
5106
4b785da1 5107 my $cloudinit_opt;
3d7389fe 5108 foreach my $opt (keys %{$conf->{pending}}) {
3a11fadb 5109 next if $selection && !$selection->{$opt};
3d7389fe 5110 my $value = $conf->{pending}->{$opt};
3a11fadb 5111 eval {
51a6f637
AD
5112 if ($opt eq 'hotplug') {
5113 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5114 } elsif ($opt eq 'tablet') {
b3c2bdd1 5115 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5116 if ($value == 1) {
d559309f
WB
5117 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5118 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5119 if $arch eq 'aarch64';
3a11fadb 5120 } elsif ($value == 0) {
d559309f
WB
5121 vm_deviceunplug($vmid, $conf, 'tablet');
5122 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5123 }
c60cad61
DC
5124 } elsif ($opt =~ m/^usb(\d+)$/) {
5125 my $index = $1;
5126 die "skip\n" if !$usb_hotplug;
0cf8d56c 5127 my $d = eval { parse_property_string('pve-qm-usb', $value) };
c60cad61 5128 my $id = $opt;
0cf8d56c 5129 if ($d->{host} =~ m/^spice$/i) {
c60cad61
DC
5130 $id = "usbredirdev$index";
5131 }
5132 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
8edc9c08 5133 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5134 die "skip\n" if !$hotplug_features->{cpu};
3a11fadb
DM
5135 qemu_cpu_hotplug($vmid, $conf, $value);
5136 } elsif ($opt eq 'balloon') {
81d95ae1 5137 # enable/disable balloning device is not hotpluggable
8fe689e7 5138 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
a1b7d579 5139 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
81d95ae1
DM
5140 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5141
3a11fadb 5142 # allow manual ballooning if shares is set to zero
4cc1efa6 5143 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
9c2f7069 5144 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
0a13e08e 5145 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
9c2f7069 5146 }
a1b7d579 5147 } elsif ($opt =~ m/^net(\d+)$/) {
3eec5767 5148 # some changes can be done without hotplug
a1b7d579 5149 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
d559309f 5150 $vmid, $opt, $value, $arch, $machine_type);
74479ee9 5151 } elsif (is_valid_drivename($opt)) {
f9dde219 5152 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
a05cff86 5153 # some changes can be done without hotplug
9ed7a77c
WB
5154 my $drive = parse_drive($opt, $value);
5155 if (drive_is_cloudinit($drive)) {
4b785da1
WB
5156 $cloudinit_opt = [$opt, $drive];
5157 # apply all the other changes first, then generate the cloudinit disk
5158 die "skip\n";
9ed7a77c 5159 }
b3c2bdd1 5160 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
9e7bce2c 5161 $vmid, $opt, $value, $arch, $machine_type);
4d3f29ed
AD
5162 } elsif ($opt =~ m/^memory$/) { #dimms
5163 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5164 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
c8effec3 5165 } elsif ($opt eq 'cpuunits') {
6b7ef5e5 5166 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
0d318453 5167 $cgroup->change_cpu_shares($new_cpuunits);
58be00f1 5168 } elsif ($opt eq 'cpulimit') {
c6f773b8 5169 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5b65b00d 5170 $cgroup->change_cpu_quota($cpulimit, 100000);
74ea2c65
AD
5171 } elsif ($opt eq 'agent') {
5172 vmconfig_update_agent($conf, $opt, $value);
3a11fadb 5173 } else {
e56beeda 5174 die "skip\n"; # skip non-hot-pluggable options
3d7389fe 5175 }
3a11fadb 5176 };
4b785da1
WB
5177 if (my $err = $@) {
5178 &$add_error($opt, $err) if $err ne "skip\n";
5179 } else {
5180 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5181 $conf->{$opt} = $value;
5182 delete $conf->{pending}->{$opt};
5183 }
5184 }
5185
5186 if (defined($cloudinit_opt)) {
5187 my ($opt, $drive) = @$cloudinit_opt;
5188 my $value = $conf->{pending}->{$opt};
5189 eval {
9660e606
WB
5190 my $temp = {%$conf, $opt => $value};
5191 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
4b785da1
WB
5192 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5193 $vmid, $opt, $value, $arch, $machine_type);
5194 };
3a11fadb 5195 if (my $err = $@) {
e56beeda
DM
5196 &$add_error($opt, $err) if $err ne "skip\n";
5197 } else {
3a11fadb
DM
5198 $conf->{$opt} = $value;
5199 delete $conf->{pending}->{$opt};
3d7389fe 5200 }
3d7389fe 5201 }
c60cad61
DC
5202
5203 # unplug xhci controller if no usb device is left
5204 if ($usb_hotplug) {
5205 my $has_usb = 0;
0cf8d56c 5206 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
c60cad61
DC
5207 next if !defined($conf->{"usb$i"});
5208 $has_usb = 1;
5209 last;
5210 }
5211 if (!$has_usb) {
5212 vm_deviceunplug($vmid, $conf, 'xhci');
5213 }
5214 }
5215
4df15a03 5216 PVE::QemuConfig->write_config($vmid, $conf);
94ec5e7c 5217
4b785da1
WB
5218 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5219 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
94ec5e7c 5220 }
c427973b 5221}
055d554d 5222
3dc38fbb
WB
5223sub try_deallocate_drive {
5224 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5225
5226 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5227 my $volid = $drive->{file};
5228 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5229 my $sid = PVE::Storage::parse_volume_id($volid);
5230 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
cee01bcb
WB
5231
5232 # check if the disk is really unused
cee01bcb 5233 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
e0fd2b2f 5234 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
cee01bcb 5235 PVE::Storage::vdisk_free($storecfg, $volid);
3dc38fbb 5236 return 1;
40b977f3
WL
5237 } else {
5238 # If vm is not owner of this disk remove from config
5239 return 1;
3dc38fbb
WB
5240 }
5241 }
5242
d1c1af4b 5243 return;
3dc38fbb
WB
5244}
5245
5246sub vmconfig_delete_or_detach_drive {
5247 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5248
5249 my $drive = parse_drive($opt, $conf->{$opt});
5250
5251 my $rpcenv = PVE::RPCEnvironment::get();
5252 my $authuser = $rpcenv->get_user();
5253
5254 if ($force) {
5255 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5256 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5257 } else {
5258 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5259 }
5260}
5261
98bc3aeb
OB
5262
5263
055d554d 5264sub vmconfig_apply_pending {
e97bbbb6 5265 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
eb5e482d 5266
a644de29
OB
5267 return if !scalar(keys %{$conf->{pending}});
5268
eb5e482d
OB
5269 my $add_apply_error = sub {
5270 my ($opt, $msg) = @_;
5271 my $err_msg = "unable to apply pending change $opt : $msg";
5272 $errors->{$opt} = $err_msg;
5273 warn $err_msg;
5274 };
c427973b
DM
5275
5276 # cold plug
055d554d 5277
98bc3aeb 5278 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
d321c4a9 5279 foreach my $opt (sort keys %$pending_delete_hash) {
fb4d1ba2 5280 my $force = $pending_delete_hash->{$opt}->{force};
eb5e482d 5281 eval {
3d48b95a
OB
5282 if ($opt =~ m/^unused/) {
5283 die "internal error";
5284 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5285 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
eb5e482d
OB
5286 }
5287 };
5288 if (my $err = $@) {
5289 $add_apply_error->($opt, $err);
055d554d 5290 } else {
98bc3aeb 5291 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
055d554d 5292 delete $conf->{$opt};
055d554d
DM
5293 }
5294 }
5295
3d48b95a 5296 PVE::QemuConfig->cleanup_pending($conf);
055d554d 5297
7a24c98a 5298 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
c930f99e 5299
055d554d 5300 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3d48b95a 5301 next if $opt eq 'delete'; # just to be sure
eb5e482d 5302 eval {
3d48b95a 5303 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5304 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
eb5e482d
OB
5305 }
5306 };
5307 if (my $err = $@) {
5308 $add_apply_error->($opt, $err);
055d554d 5309 } else {
c930f99e
AD
5310
5311 if (is_valid_drivename($opt)) {
5312 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
7a24c98a 5313 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
c930f99e
AD
5314 }
5315
eb5e482d 5316 $conf->{$opt} = delete $conf->{pending}->{$opt};
055d554d 5317 }
055d554d 5318 }
3d48b95a
OB
5319
5320 # write all changes at once to avoid unnecessary i/o
5321 PVE::QemuConfig->write_config($vmid, $conf);
7a24c98a 5322 if ($generate_cloudinit) {
4b785da1
WB
5323 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5324 # After successful generation and if there were changes to be applied, update the
5325 # config to drop the {cloudinit} entry.
5326 PVE::QemuConfig->write_config($vmid, $conf);
5327 }
5328 }
055d554d
DM
5329}
5330
3eec5767 5331sub vmconfig_update_net {
d559309f 5332 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
3eec5767
DM
5333
5334 my $newnet = parse_net($value);
5335
5336 if ($conf->{$opt}) {
5337 my $oldnet = parse_net($conf->{$opt});
5338
0f1af9e7
OB
5339 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5340 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5341 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
62fdcfd4 5342 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
3eec5767
DM
5343 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5344
5345 # for non online change, we try to hot-unplug
7196b757 5346 die "skip\n" if !$hotplug;
3eec5767
DM
5347 vm_deviceunplug($vmid, $conf, $opt);
5348 } else {
5349
5350 die "internal error" if $opt !~ m/net(\d+)/;
5351 my $iface = "tap${vmid}i$1";
a1b7d579 5352
0f1af9e7
OB
5353 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5354 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5355 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5356 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
3eec5767 5357 PVE::Network::tap_unplug($iface);
28e129cc
AD
5358
5359 if ($have_sdn) {
5360 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5361 } else {
5362 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5363 }
0f1af9e7 5364 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4f4fbeb0
WB
5365 # Rate can be applied on its own but any change above needs to
5366 # include the rate in tap_plug since OVS resets everything.
5367 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
3eec5767 5368 }
38c590d9 5369
0f1af9e7 5370 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
25088687
DM
5371 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5372 }
5373
38c590d9 5374 return 1;
3eec5767
DM
5375 }
5376 }
a1b7d579 5377
7196b757 5378 if ($hotplug) {
d559309f 5379 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
38c590d9
DM
5380 } else {
5381 die "skip\n";
5382 }
3eec5767
DM
5383}
5384
74ea2c65
AD
5385sub vmconfig_update_agent {
5386 my ($conf, $opt, $value) = @_;
5387
5388 die "skip\n" if !$conf->{$opt};
5389
5390 my $hotplug_options = { fstrim_cloned_disks => 1 };
5391
5392 my $old_agent = parse_guest_agent($conf);
5393 my $agent = parse_guest_agent({$opt => $value});
5394
33f8b887 5395 for my $option (keys %$agent) { # added/changed options
74ea2c65
AD
5396 next if defined($hotplug_options->{$option});
5397 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5398 }
5399
33f8b887 5400 for my $option (keys %$old_agent) { # removed options
74ea2c65
AD
5401 next if defined($hotplug_options->{$option});
5402 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5403 }
33f8b887
TL
5404
5405 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
74ea2c65
AD
5406}
5407
a05cff86 5408sub vmconfig_update_disk {
9e7bce2c 5409 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
a05cff86
DM
5410
5411 my $drive = parse_drive($opt, $value);
5412
4df98f2f
TL
5413 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5414 my $media = $drive->{media} || 'disk';
5415 my $oldmedia = $old_drive->{media} || 'disk';
5416 die "unable to change media type\n" if $media ne $oldmedia;
a05cff86 5417
4df98f2f 5418 if (!drive_is_cdrom($old_drive)) {
a05cff86 5419
4df98f2f 5420 if ($drive->{file} ne $old_drive->{file}) {
a05cff86 5421
4df98f2f 5422 die "skip\n" if !$hotplug;
a05cff86 5423
4df98f2f
TL
5424 # unplug and register as unused
5425 vm_deviceunplug($vmid, $conf, $opt);
5426 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
a1b7d579 5427
4df98f2f
TL
5428 } else {
5429 # update existing disk
5430
5431 # skip non hotpluggable value
ea7c3b39
FE
5432 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5433 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
4df98f2f
TL
5434 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5435 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5436 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
e0e036e1
LN
5437 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5438 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
4df98f2f
TL
5439 die "skip\n";
5440 }
a05cff86 5441
4df98f2f
TL
5442 # apply throttle
5443 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5444 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5445 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5446 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5447 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5448 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5449 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5450 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5451 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5452 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5453 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5454 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5455 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5456 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5457 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5458 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5459 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5460 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5461
5462 qemu_block_set_io_throttle(
5463 $vmid,"drive-$opt",
5464 ($drive->{mbps} || 0)*1024*1024,
5465 ($drive->{mbps_rd} || 0)*1024*1024,
5466 ($drive->{mbps_wr} || 0)*1024*1024,
5467 $drive->{iops} || 0,
5468 $drive->{iops_rd} || 0,
5469 $drive->{iops_wr} || 0,
5470 ($drive->{mbps_max} || 0)*1024*1024,
5471 ($drive->{mbps_rd_max} || 0)*1024*1024,
5472 ($drive->{mbps_wr_max} || 0)*1024*1024,
5473 $drive->{iops_max} || 0,
5474 $drive->{iops_rd_max} || 0,
5475 $drive->{iops_wr_max} || 0,
5476 $drive->{bps_max_length} || 1,
5477 $drive->{bps_rd_max_length} || 1,
5478 $drive->{bps_wr_max_length} || 1,
5479 $drive->{iops_max_length} || 1,
5480 $drive->{iops_rd_max_length} || 1,
5481 $drive->{iops_wr_max_length} || 1,
5482 );
a05cff86 5483
4df98f2f 5484 }
a1b7d579 5485
4df98f2f
TL
5486 return 1;
5487 }
4de1bb25 5488
4df98f2f 5489 } else { # cdrom
a1b7d579 5490
4df98f2f
TL
5491 if ($drive->{file} eq 'none') {
5492 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5493 if (drive_is_cloudinit($old_drive)) {
5494 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5495 }
5496 } else {
5497 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
ce9fce79 5498
4df98f2f
TL
5499 # force eject if locked
5500 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
ce9fce79 5501
4df98f2f
TL
5502 if ($path) {
5503 mon_cmd($vmid, "blockdev-change-medium",
5504 id => "$opt", filename => "$path");
4de1bb25 5505 }
a05cff86 5506 }
4df98f2f
TL
5507
5508 return 1;
a05cff86
DM
5509 }
5510 }
5511
a1b7d579 5512 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
4de1bb25 5513 # hotplug new disks
f7b4356f 5514 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
d559309f 5515 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
a05cff86
DM
5516}
5517
9687287b
AD
5518sub vmconfig_update_cloudinit_drive {
5519 my ($storecfg, $conf, $vmid) = @_;
5520
5521 my $cloudinit_ds = undef;
5522 my $cloudinit_drive = undef;
5523
5524 PVE::QemuConfig->foreach_volume($conf, sub {
5525 my ($ds, $drive) = @_;
5526 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5527 $cloudinit_ds = $ds;
5528 $cloudinit_drive = $drive;
5529 }
5530 });
5531
5532 return if !$cloudinit_drive;
5533
4b785da1
WB
5534 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5535 PVE::QemuConfig->write_config($vmid, $conf);
5536 }
5537
9687287b
AD
5538 my $running = PVE::QemuServer::check_running($vmid);
5539
5540 if ($running) {
5541 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5542 if ($path) {
5543 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5544 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5545 }
5546 }
5547}
5548
13cfe3b7 5549# called in locked context by incoming migration
ba5396b5
FG
5550sub vm_migrate_get_nbd_disks {
5551 my ($storecfg, $conf, $replicated_volumes) = @_;
13cfe3b7
FG
5552
5553 my $local_volumes = {};
912792e2 5554 PVE::QemuConfig->foreach_volume($conf, sub {
13cfe3b7
FG
5555 my ($ds, $drive) = @_;
5556
5557 return if drive_is_cdrom($drive);
41c8671e 5558 return if $ds eq 'tpmstate0';
13cfe3b7
FG
5559
5560 my $volid = $drive->{file};
5561
5562 return if !$volid;
5563
5564 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5565
5566 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5567 return if $scfg->{shared};
ba5396b5
FG
5568
5569 # replicated disks re-use existing state via bitmap
5570 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5571 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
13cfe3b7 5572 });
ba5396b5
FG
5573 return $local_volumes;
5574}
5575
5576# called in locked context by incoming migration
5577sub vm_migrate_alloc_nbd_disks {
5578 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
13cfe3b7 5579
13cfe3b7 5580 my $nbd = {};
ba5396b5 5581 foreach my $opt (sort keys %$source_volumes) {
5668463b 5582 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
ba5396b5
FG
5583
5584 if ($use_existing) {
5585 $nbd->{$opt}->{drivestr} = print_drive($drive);
5586 $nbd->{$opt}->{volid} = $volid;
5587 $nbd->{$opt}->{replicated} = 1;
13cfe3b7
FG
5588 next;
5589 }
13cfe3b7 5590
5668463b
FG
5591 # storage mapping + volname = regular migration
5592 # storage mapping + format = remote migration
5593 # order of precedence, filtered by whether storage supports it:
5594 # 1. explicit requested format
5595 # 2. format of current volume
5596 # 3. default format of storage
bf8fc5a3 5597 if (!$storagemap->{identity}) {
82a03671 5598 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
13cfe3b7 5599 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5668463b
FG
5600 if (!$format || !grep { $format eq $_ } @$validFormats) {
5601 if ($volname) {
5602 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5603 my $fileFormat = qemu_img_format($scfg, $volname);
5604 $format = $fileFormat
5605 if grep { $fileFormat eq $_ } @$validFormats;
5606 }
5607 $format //= $defFormat;
5608 }
13cfe3b7 5609 } else {
5668463b 5610 # can't happen for remote migration, so $volname is always defined
13cfe3b7
FG
5611 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5612 $format = qemu_img_format($scfg, $volname);
5613 }
5614
4df98f2f
TL
5615 my $size = $drive->{size} / 1024;
5616 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
13cfe3b7
FG
5617 my $newdrive = $drive;
5618 $newdrive->{format} = $format;
5619 $newdrive->{file} = $newvolid;
5620 my $drivestr = print_drive($newdrive);
ba5396b5
FG
5621 $nbd->{$opt}->{drivestr} = $drivestr;
5622 $nbd->{$opt}->{volid} = $newvolid;
13cfe3b7
FG
5623 }
5624
5625 return $nbd;
5626}
5627
5628# see vm_start_nolock for parameters, additionally:
5629# migrate_opts:
bf8fc5a3 5630# storagemap = parsed storage map for allocating NBD disks
3898a563
FG
5631sub vm_start {
5632 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5633
84da8217 5634 return PVE::QemuConfig->lock_config($vmid, sub {
3898a563
FG
5635 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5636
4ef13a7f
FG
5637 die "you can't start a vm if it's a template\n"
5638 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
3898a563 5639
d544e0e0 5640 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
8e0c97bb
SR
5641 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5642
5643 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5644
5645 if ($has_backup_lock && $running) {
5646 # a backup is currently running, attempt to start the guest in the
5647 # existing QEMU instance
5648 return vm_resume($vmid);
5649 }
3898a563
FG
5650
5651 PVE::QemuConfig->check_lock($conf)
d544e0e0
FE
5652 if !($params->{skiplock} || $has_suspended_lock);
5653
5654 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
3898a563 5655
8e0c97bb 5656 die "VM $vmid already running\n" if $running;
3898a563 5657
ba5396b5
FG
5658 if (my $storagemap = $migrate_opts->{storagemap}) {
5659 my $replicated = $migrate_opts->{replicated_volumes};
5660 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5661 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5662
5663 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5664 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5665 }
5666 }
13cfe3b7 5667
84da8217 5668 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
3898a563
FG
5669 });
5670}
5671
5672
0c498cca
FG
5673# params:
5674# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5675# skiplock => 0/1, skip checking for config lock
4ef13a7f 5676# skiptemplate => 0/1, skip checking whether VM is template
7bd9abd2 5677# forcemachine => to force QEMU machine (rollback/migration)
58c64ad5 5678# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
0c498cca
FG
5679# timeout => in seconds
5680# paused => start VM in paused state (backup)
3898a563 5681# resume => resume from hibernation
5921764c
SR
5682# pbs-backing => {
5683# sata0 => {
5684# repository
5685# snapshot
5686# keyfile
5687# archive
5688# },
5689# virtio2 => ...
5690# }
0c498cca 5691# migrate_opts:
ba5396b5 5692# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
0c498cca
FG
5693# migratedfrom => source node
5694# spice_ticket => used for spice migration, passed via tunnel/stdin
5695# network => CIDR of migration network
5696# type => secure/insecure - tunnel over encrypted connection or plain-text
0c498cca 5697# nbd_proto_version => int, 0 for TCP, 1 for UNIX
fd95d780 5698# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
13d121d7
FE
5699# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5700# contained in config
3898a563
FG
5701sub vm_start_nolock {
5702 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
1e3baf05 5703
3898a563
FG
5704 my $statefile = $params->{statefile};
5705 my $resume = $params->{resume};
3dcb98d5 5706
3898a563
FG
5707 my $migratedfrom = $migrate_opts->{migratedfrom};
5708 my $migration_type = $migrate_opts->{type};
7ceade4c 5709
84da8217
FG
5710 my $res = {};
5711
3898a563
FG
5712 # clean up leftover reboot request files
5713 eval { clear_reboot_request($vmid); };
5714 warn $@ if $@;
1e3baf05 5715
3898a563
FG
5716 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5717 vmconfig_apply_pending($vmid, $conf, $storecfg);
5718 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5719 }
64457ed4 5720
3de134ef
WB
5721 # don't regenerate the ISO if the VM is started as part of a live migration
5722 # this way we can reuse the old ISO with the correct config
4b785da1
WB
5723 if (!$migratedfrom) {
5724 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5725 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5726 # $conf->{cloudinit}, so we could just not do this?
5727 # But we do it above, so for now let's be consistent.
5728 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5729 }
5730 }
3de134ef 5731
13d121d7
FE
5732 # override offline migrated volumes, conf is out of date still
5733 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5734 for my $key (sort keys $offline_volumes->%*) {
5735 my $parsed = parse_drive($key, $conf->{$key});
5736 $parsed->{file} = $offline_volumes->{$key};
5737 $conf->{$key} = print_drive($parsed);
5738 }
fd95d780
FG
5739 }
5740
3898a563 5741 my $defaults = load_defaults();
0c9a7596 5742
3898a563 5743 # set environment variable useful inside network script
eef93bc5
FG
5744 # for remote migration the config is available on the target node!
5745 if (!$migrate_opts->{remote_node}) {
5746 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5747 }
6c47d546 5748
3898a563 5749 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
9e784b11 5750
3898a563 5751 my $forcemachine = $params->{forcemachine};
ea1c2110 5752 my $forcecpu = $params->{forcecpu};
3898a563 5753 if ($resume) {
ea1c2110 5754 # enforce machine and CPU type on suspended vm to ensure HW compatibility
3898a563 5755 $forcemachine = $conf->{runningmachine};
ea1c2110 5756 $forcecpu = $conf->{runningcpu};
3898a563
FG
5757 print "Resuming suspended VM\n";
5758 }
7ceade4c 5759
9b71c34d 5760 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5921764c 5761 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
6c47d546 5762
3898a563
FG
5763 my $migration_ip;
5764 my $get_migration_ip = sub {
5765 my ($nodename) = @_;
b24e1ac2 5766
3898a563 5767 return $migration_ip if defined($migration_ip);
b24e1ac2 5768
3898a563 5769 my $cidr = $migrate_opts->{network};
0c498cca 5770
3898a563
FG
5771 if (!defined($cidr)) {
5772 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5773 $cidr = $dc_conf->{migration}->{network};
5774 }
b24e1ac2 5775
3898a563
FG
5776 if (defined($cidr)) {
5777 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
b24e1ac2 5778
3898a563
FG
5779 die "could not get IP: no address configured on local " .
5780 "node for network '$cidr'\n" if scalar(@$ips) == 0;
b24e1ac2 5781
3898a563
FG
5782 die "could not get IP: multiple addresses configured on local " .
5783 "node for network '$cidr'\n" if scalar(@$ips) > 1;
b24e1ac2 5784
3898a563
FG
5785 $migration_ip = @$ips[0];
5786 }
b24e1ac2 5787
3898a563
FG
5788 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5789 if !defined($migration_ip);
b24e1ac2 5790
3898a563
FG
5791 return $migration_ip;
5792 };
b24e1ac2 5793
3898a563
FG
5794 if ($statefile) {
5795 if ($statefile eq 'tcp') {
05b2a4ae
FG
5796 my $migrate = $res->{migrate} = { proto => 'tcp' };
5797 $migrate->{addr} = "localhost";
3898a563
FG
5798 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5799 my $nodename = nodename();
2de2d6f7 5800
3898a563
FG
5801 if (!defined($migration_type)) {
5802 if (defined($datacenterconf->{migration}->{type})) {
5803 $migration_type = $datacenterconf->{migration}->{type};
5804 } else {
5805 $migration_type = 'secure';
b7a5a225 5806 }
3898a563 5807 }
b7a5a225 5808
3898a563 5809 if ($migration_type eq 'insecure') {
05b2a4ae
FG
5810 $migrate->{addr} = $get_migration_ip->($nodename);
5811 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
3898a563 5812 }
2de2d6f7 5813
3898a563 5814 my $pfamily = PVE::Tools::get_host_address_family($nodename);
05b2a4ae
FG
5815 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5816 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5817 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5818 push @$cmd, '-S';
1c9d54bf 5819
3898a563
FG
5820 } elsif ($statefile eq 'unix') {
5821 # should be default for secure migrations as a ssh TCP forward
5822 # tunnel is not deterministic reliable ready and fails regurarly
5823 # to set up in time, so use UNIX socket forwards
05b2a4ae
FG
5824 my $migrate = $res->{migrate} = { proto => 'unix' };
5825 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5826 unlink $migrate->{addr};
54323eed 5827
05b2a4ae
FG
5828 $migrate->{uri} = "unix:$migrate->{addr}";
5829 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5830 push @$cmd, '-S';
1c9d54bf 5831
3898a563
FG
5832 } elsif (-e $statefile) {
5833 push @$cmd, '-loadstate', $statefile;
5834 } else {
5835 my $statepath = PVE::Storage::path($storecfg, $statefile);
5836 push @$vollist, $statefile;
5837 push @$cmd, '-loadstate', $statepath;
5838 }
5839 } elsif ($params->{paused}) {
5840 push @$cmd, '-S';
5841 }
5842
1fb1822e
DC
5843 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5844
9b71c34d
DC
5845 my $pci_reserve_list = [];
5846 for my $device (values $pci_devices->%*) {
5847 next if $device->{mdev}; # we don't reserve for mdev devices
5848 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
1fb1822e
DC
5849 }
5850
1fb1822e 5851 # reserve all PCI IDs before actually doing anything with them
9b71c34d 5852 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
1fb1822e
DC
5853
5854 eval {
bbf96e0f 5855 my $uuid;
1fb1822e
DC
5856 for my $id (sort keys %$pci_devices) {
5857 my $d = $pci_devices->{$id};
9b71c34d
DC
5858 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5859
5860 my $chosen_mdev;
5861 for my $dev ($d->{ids}->@*) {
5862 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5863 if ($d->{mdev}) {
5864 warn $@ if $@;
5865 $chosen_mdev = $info;
5866 last if $chosen_mdev; # if successful, we're done
5867 } else {
5868 die $@ if $@;
5869 }
5870 }
5871
5872 next if !$d->{mdev};
5873 die "could not create mediated device\n" if !defined($chosen_mdev);
5874
5875 # nvidia grid needs the uuid of the mdev as qemu parameter
5876 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5877 if (defined($conf->{smbios1})) {
5878 my $smbios_conf = parse_smbios1($conf->{smbios1});
5879 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
bbf96e0f 5880 }
9b71c34d 5881 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
1fb1822e
DC
5882 }
5883 }
bbf96e0f 5884 push @$cmd, '-uuid', $uuid if defined($uuid);
1fb1822e
DC
5885 };
5886 if (my $err = $@) {
1b189121 5887 eval { cleanup_pci_devices($vmid, $conf) };
1fb1822e
DC
5888 warn $@ if $@;
5889 die $err;
3898a563 5890 }
1e3baf05 5891
3898a563 5892 PVE::Storage::activate_volumes($storecfg, $vollist);
1e3baf05 5893
728404c0
TL
5894
5895 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
3d79cf55 5896 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
728404c0 5897 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
3898a563
FG
5898 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5899 # timeout should be more than enough here...
39abafc8 5900 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
3898a563 5901
6b7ef5e5 5902 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
3898a563 5903
3898a563
FG
5904 my %run_params = (
5905 timeout => $statefile ? undef : $start_timeout,
5906 umask => 0077,
5907 noerr => 1,
5908 );
1e3baf05 5909
3898a563
FG
5910 # when migrating, prefix QEMU output so other side can pick up any
5911 # errors that might occur and show the user
5912 if ($migratedfrom) {
5913 $run_params{quiet} = 1;
5914 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5915 }
8bf30c2a 5916
212220a4 5917 my %systemd_properties = (
3898a563 5918 Slice => 'qemu.slice',
354e61aa
SR
5919 KillMode => 'process',
5920 SendSIGKILL => 0,
5921 TimeoutStopUSec => ULONG_MAX, # infinity
3898a563 5922 );
7023f3ea 5923
6cbd3eb8 5924 if (PVE::CGroup::cgroup_mode() == 2) {
212220a4 5925 $systemd_properties{CPUWeight} = $cpuunits;
6cbd3eb8 5926 } else {
212220a4 5927 $systemd_properties{CPUShares} = $cpuunits;
6cbd3eb8
AD
5928 }
5929
3898a563 5930 if (my $cpulimit = $conf->{cpulimit}) {
212220a4 5931 $systemd_properties{CPUQuota} = int($cpulimit * 100);
3898a563 5932 }
212220a4 5933 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
7023f3ea 5934
3898a563
FG
5935 my $run_qemu = sub {
5936 PVE::Tools::run_fork sub {
212220a4 5937 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
6e0216d8 5938
f9dde219
SR
5939 my $tpmpid;
5940 if (my $tpm = $conf->{tpmstate0}) {
5941 # start the TPM emulator so QEMU can connect on start
5942 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5943 }
5944
3898a563 5945 my $exitcode = run_command($cmd, %run_params);
f9dde219 5946 if ($exitcode) {
23bee97d
FE
5947 if ($tpmpid) {
5948 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5949 kill 'TERM', $tpmpid;
5950 }
f9dde219
SR
5951 die "QEMU exited with code $exitcode\n";
5952 }
503308ed 5953 };
3898a563 5954 };
503308ed 5955
3898a563 5956 if ($conf->{hugepages}) {
7023f3ea 5957
3898a563 5958 my $code = sub {
dafb728c
AD
5959 my $hotplug_features =
5960 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5961 my $hugepages_topology =
5962 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5963
3898a563 5964 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
7023f3ea 5965
3898a563
FG
5966 PVE::QemuServer::Memory::hugepages_mount();
5967 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
7023f3ea 5968
503308ed 5969 eval { $run_qemu->() };
3898a563 5970 if (my $err = $@) {
f36e9894
SR
5971 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5972 if !$conf->{keephugepages};
3898a563
FG
5973 die $err;
5974 }
77cde36b 5975
f36e9894
SR
5976 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5977 if !$conf->{keephugepages};
3898a563
FG
5978 };
5979 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
1e3baf05 5980
3898a563
FG
5981 } else {
5982 eval { $run_qemu->() };
5983 }
afdb31d5 5984
3898a563
FG
5985 if (my $err = $@) {
5986 # deactivate volumes if start fails
5987 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
1b189121
DC
5988 warn $@ if $@;
5989 eval { cleanup_pci_devices($vmid, $conf) };
5990 warn $@ if $@;
1fb1822e 5991
3898a563
FG
5992 die "start failed: $err";
5993 }
62de2cbd 5994
1fb1822e
DC
5995 # re-reserve all PCI IDs now that we can know the actual VM PID
5996 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
9b71c34d 5997 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
1fb1822e
DC
5998 warn $@ if $@;
5999
05b2a4ae
FG
6000 if (defined($res->{migrate})) {
6001 print "migration listens on $res->{migrate}->{uri}\n";
6002 } elsif ($statefile) {
3898a563
FG
6003 eval { mon_cmd($vmid, "cont"); };
6004 warn $@ if $@;
6005 }
2189246c 6006
3898a563 6007 #start nbd server for storage migration
13cfe3b7 6008 if (my $nbd = $migrate_opts->{nbd}) {
3898a563 6009 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
2189246c 6010
3898a563
FG
6011 my $migrate_storage_uri;
6012 # nbd_protocol_version > 0 for unix socket support
eef93bc5 6013 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
3898a563
FG
6014 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
6015 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
6016 $migrate_storage_uri = "nbd:unix:$socket_path";
05b2a4ae 6017 $res->{migrate}->{unix_sockets} = [$socket_path];
3898a563
FG
6018 } else {
6019 my $nodename = nodename();
6020 my $localip = $get_migration_ip->($nodename);
6021 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6022 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6023
4df98f2f
TL
6024 mon_cmd($vmid, "nbd-server-start", addr => {
6025 type => 'inet',
6026 data => {
6027 host => "${localip}",
6028 port => "${storage_migrate_port}",
6029 },
6030 });
3898a563
FG
6031 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6032 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
2189246c
AD
6033 }
6034
83f04be3
FE
6035 my $block_info = mon_cmd($vmid, "query-block");
6036 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6037
13cfe3b7 6038 foreach my $opt (sort keys %$nbd) {
ba5396b5
FG
6039 my $drivestr = $nbd->{$opt}->{drivestr};
6040 my $volid = $nbd->{$opt}->{volid};
83f04be3
FE
6041
6042 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6043
6044 mon_cmd(
6045 $vmid,
6046 "block-export-add",
6047 id => "drive-$opt",
6048 'node-name' => $block_node,
6049 writable => JSON::true,
6050 type => "nbd",
6051 name => "drive-$opt", # NBD export name
6052 );
6053
84da8217
FG
6054 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6055 print "storage migration listens on $nbd_uri volume:$drivestr\n";
ba5396b5
FG
6056 print "re-using replicated volume: $opt - $volid\n"
6057 if $nbd->{$opt}->{replicated};
84da8217
FG
6058
6059 $res->{drives}->{$opt} = $nbd->{$opt};
6060 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
3898a563
FG
6061 }
6062 }
a89fded1 6063
3898a563
FG
6064 if ($migratedfrom) {
6065 eval {
6066 set_migration_caps($vmid);
6067 };
6068 warn $@ if $@;
6069
6070 if ($spice_port) {
6071 print "spice listens on port $spice_port\n";
84da8217 6072 $res->{spice_port} = $spice_port;
3898a563 6073 if ($migrate_opts->{spice_ticket}) {
4df98f2f
TL
6074 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6075 $migrate_opts->{spice_ticket});
3898a563 6076 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
95a4b4a9 6077 }
3898a563 6078 }
95a4b4a9 6079
3898a563
FG
6080 } else {
6081 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6082 if !$statefile && $conf->{balloon};
25088687 6083
3898a563
FG
6084 foreach my $opt (keys %$conf) {
6085 next if $opt !~ m/^net\d+$/;
6086 my $nicconf = parse_net($conf->{$opt});
6087 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
e18b0b99 6088 }
21947fea 6089 add_nets_bridge_fdb($conf, $vmid);
3898a563 6090 }
a1b7d579 6091
4044ae1f 6092 if (!defined($conf->{balloon}) || $conf->{balloon}) {
cb64a643
FE
6093 eval {
6094 mon_cmd(
6095 $vmid,
6096 'qom-set',
6097 path => "machine/peripheral/balloon0",
6098 property => "guest-stats-polling-interval",
6099 value => 2
6100 );
6101 };
6102 log_warn("could not set polling interval for ballooning - $@") if $@;
4044ae1f 6103 }
eb065317 6104
3898a563
FG
6105 if ($resume) {
6106 print "Resumed VM, removing state\n";
6107 if (my $vmstate = $conf->{vmstate}) {
6108 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6109 PVE::Storage::vdisk_free($storecfg, $vmstate);
7ceade4c 6110 }
ea1c2110 6111 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
3898a563
FG
6112 PVE::QemuConfig->write_config($vmid, $conf);
6113 }
7ceade4c 6114
3898a563 6115 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
84da8217
FG
6116
6117 return $res;
1e3baf05
DM
6118}
6119
1e3baf05 6120sub vm_commandline {
b14477e7 6121 my ($storecfg, $vmid, $snapname) = @_;
1e3baf05 6122
ffda963f 6123 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6124
e8a26810 6125 my ($forcemachine, $forcecpu);
b14477e7
RV
6126 if ($snapname) {
6127 my $snapshot = $conf->{snapshots}->{$snapname};
87d92707
TL
6128 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6129
ea1c2110
SR
6130 # check for machine or CPU overrides in snapshot
6131 $forcemachine = $snapshot->{runningmachine};
6132 $forcecpu = $snapshot->{runningcpu};
092868c4 6133
87d92707 6134 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
b14477e7 6135
b14477e7
RV
6136 $conf = $snapshot;
6137 }
6138
1e3baf05
DM
6139 my $defaults = load_defaults();
6140
e8a26810 6141 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
1e3baf05 6142
5930c1ff 6143 return PVE::Tools::cmd2string($cmd);
1e3baf05
DM
6144}
6145
6146sub vm_reset {
6147 my ($vmid, $skiplock) = @_;
6148
ffda963f 6149 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6150
ffda963f 6151 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6152
ffda963f 6153 PVE::QemuConfig->check_lock($conf) if !$skiplock;
1e3baf05 6154
0a13e08e 6155 mon_cmd($vmid, "system_reset");
ff1a2432
DM
6156 });
6157}
6158
6159sub get_vm_volumes {
6160 my ($conf) = @_;
1e3baf05 6161
ff1a2432 6162 my $vollist = [];
0b7a0b78 6163 foreach_volid($conf, sub {
392f8b5d 6164 my ($volid, $attr) = @_;
ff1a2432 6165
d5769dc2 6166 return if $volid =~ m|^/|;
ff1a2432 6167
d5769dc2
DM
6168 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6169 return if !$sid;
ff1a2432
DM
6170
6171 push @$vollist, $volid;
1e3baf05 6172 });
ff1a2432
DM
6173
6174 return $vollist;
6175}
6176
1b189121
DC
6177sub cleanup_pci_devices {
6178 my ($vmid, $conf) = @_;
6179
6180 foreach my $key (keys %$conf) {
6181 next if $key !~ m/^hostpci(\d+)$/;
6182 my $hostpciindex = $1;
6183 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6184 my $d = parse_hostpci($conf->{$key});
faf72d6c
TL
6185 if ($d->{mdev}) {
6186 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6187 # don't want to break ABI just for this two liner
6188 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
49c51a60
DC
6189
6190 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6191 # out when we do it first. so wait for 10 seconds and then try it
9b71c34d 6192 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
49c51a60
DC
6193 sleep 10;
6194 }
6195
faf72d6c
TL
6196 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6197 }
1b189121
DC
6198 }
6199 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6200}
6201
ff1a2432 6202sub vm_stop_cleanup {
70b04821 6203 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
ff1a2432 6204
745fed70 6205 eval {
ff1a2432 6206
254575e9
DM
6207 if (!$keepActive) {
6208 my $vollist = get_vm_volumes($conf);
6209 PVE::Storage::deactivate_volumes($storecfg, $vollist);
f9dde219
SR
6210
6211 if (my $tpmdrive = $conf->{tpmstate0}) {
6212 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6213 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6214 if ($storeid) {
6215 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6216 }
6217 }
254575e9 6218 }
a1b7d579 6219
ab6a046f 6220 foreach my $ext (qw(mon qmp pid vnc qga)) {
961bfcb2
DM
6221 unlink "/var/run/qemu-server/${vmid}.$ext";
6222 }
a1b7d579 6223
6dbcb073 6224 if ($conf->{ivshmem}) {
4df98f2f 6225 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4c5a6a24
TL
6226 # just delete it for now, VMs which have this already open do not
6227 # are affected, but new VMs will get a separated one. If this
6228 # becomes an issue we either add some sort of ref-counting or just
6229 # add a "don't delete on stop" flag to the ivshmem format.
6dbcb073
DC
6230 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6231 }
6232
1b189121 6233 cleanup_pci_devices($vmid, $conf);
6ab45bd7 6234
70b04821 6235 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
745fed70
DM
6236 };
6237 warn $@ if $@; # avoid errors - just warn
1e3baf05
DM
6238}
6239
575d19da
DC
6240# call only in locked context
6241sub _do_vm_stop {
6242 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
9269013a 6243
575d19da
DC
6244 my $pid = check_running($vmid, $nocheck);
6245 return if !$pid;
1e3baf05 6246
575d19da
DC
6247 my $conf;
6248 if (!$nocheck) {
6249 $conf = PVE::QemuConfig->load_config($vmid);
6250 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6251 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6252 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6253 $timeout = $opts->{down} if $opts->{down};
e6c3b671 6254 }
575d19da
DC
6255 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6256 }
19672434 6257
575d19da
DC
6258 eval {
6259 if ($shutdown) {
a2af1bbe 6260 if (defined($conf) && get_qga_key($conf, 'enabled')) {
0a13e08e 6261 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
9269013a 6262 } else {
0a13e08e 6263 mon_cmd($vmid, "system_powerdown");
1e3baf05
DM
6264 }
6265 } else {
0a13e08e 6266 mon_cmd($vmid, "quit");
1e3baf05 6267 }
575d19da
DC
6268 };
6269 my $err = $@;
1e3baf05 6270
575d19da
DC
6271 if (!$err) {
6272 $timeout = 60 if !defined($timeout);
1e3baf05
DM
6273
6274 my $count = 0;
e6c3b671 6275 while (($count < $timeout) && check_running($vmid, $nocheck)) {
1e3baf05
DM
6276 $count++;
6277 sleep 1;
6278 }
6279
6280 if ($count >= $timeout) {
575d19da
DC
6281 if ($force) {
6282 warn "VM still running - terminating now with SIGTERM\n";
6283 kill 15, $pid;
6284 } else {
6285 die "VM quit/powerdown failed - got timeout\n";
6286 }
6287 } else {
6288 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6289 return;
1e3baf05 6290 }
575d19da 6291 } else {
d60cbb97
TL
6292 if (!check_running($vmid, $nocheck)) {
6293 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6294 return;
6295 }
6296 if ($force) {
575d19da
DC
6297 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6298 kill 15, $pid;
6299 } else {
6300 die "VM quit/powerdown failed\n";
6301 }
6302 }
6303
6304 # wait again
6305 $timeout = 10;
6306
6307 my $count = 0;
6308 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6309 $count++;
6310 sleep 1;
6311 }
6312
6313 if ($count >= $timeout) {
6314 warn "VM still running - terminating now with SIGKILL\n";
6315 kill 9, $pid;
6316 sleep 1;
6317 }
1e3baf05 6318
575d19da
DC
6319 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6320}
6321
6322# Note: use $nocheck to skip tests if VM configuration file exists.
6323# We need that when migration VMs to other nodes (files already moved)
6324# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6325sub vm_stop {
6326 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6327
6328 $force = 1 if !defined($force) && !$shutdown;
6329
6330 if ($migratedfrom){
6331 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6332 kill 15, $pid if $pid;
6333 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6334 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6335 return;
6336 }
6337
6338 PVE::QemuConfig->lock_config($vmid, sub {
6339 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
ff1a2432 6340 });
1e3baf05
DM
6341}
6342
165411f0
DC
6343sub vm_reboot {
6344 my ($vmid, $timeout) = @_;
6345
6346 PVE::QemuConfig->lock_config($vmid, sub {
66026117 6347 eval {
165411f0 6348
66026117
OB
6349 # only reboot if running, as qmeventd starts it again on a stop event
6350 return if !check_running($vmid);
165411f0 6351
66026117 6352 create_reboot_request($vmid);
165411f0 6353
66026117
OB
6354 my $storecfg = PVE::Storage::config();
6355 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
165411f0 6356
66026117
OB
6357 };
6358 if (my $err = $@) {
3c1c3fe6 6359 # avoid that the next normal shutdown will be confused for a reboot
66026117
OB
6360 clear_reboot_request($vmid);
6361 die $err;
6362 }
165411f0
DC
6363 });
6364}
6365
75c24bba 6366# note: if using the statestorage parameter, the caller has to check privileges
1e3baf05 6367sub vm_suspend {
48b4cdc2 6368 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
159719e5
DC
6369
6370 my $conf;
6371 my $path;
6372 my $storecfg;
6373 my $vmstate;
1e3baf05 6374
ffda963f 6375 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6376
159719e5 6377 $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6378
159719e5 6379 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
e79706d4 6380 PVE::QemuConfig->check_lock($conf)
159719e5
DC
6381 if !($skiplock || $is_backing_up);
6382
6383 die "cannot suspend to disk during backup\n"
6384 if $is_backing_up && $includestate;
bcb7c9cf 6385
159719e5
DC
6386 if ($includestate) {
6387 $conf->{lock} = 'suspending';
6388 my $date = strftime("%Y-%m-%d", localtime(time()));
6389 $storecfg = PVE::Storage::config();
75c24bba
DC
6390 if (!$statestorage) {
6391 $statestorage = find_vmstate_storage($conf, $storecfg);
6392 # check permissions for the storage
6393 my $rpcenv = PVE::RPCEnvironment::get();
6394 if ($rpcenv->{type} ne 'cli') {
6395 my $authuser = $rpcenv->get_user();
6396 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6397 }
6398 }
6399
6400
4df98f2f
TL
6401 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6402 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
159719e5
DC
6403 $path = PVE::Storage::path($storecfg, $vmstate);
6404 PVE::QemuConfig->write_config($vmid, $conf);
6405 } else {
0a13e08e 6406 mon_cmd($vmid, "stop");
159719e5 6407 }
1e3baf05 6408 });
159719e5
DC
6409
6410 if ($includestate) {
6411 # save vm state
6412 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6413
6414 eval {
27a5be53 6415 set_migration_caps($vmid, 1);
0a13e08e 6416 mon_cmd($vmid, "savevm-start", statefile => $path);
159719e5 6417 for(;;) {
0a13e08e 6418 my $state = mon_cmd($vmid, "query-savevm");
159719e5
DC
6419 if (!$state->{status}) {
6420 die "savevm not active\n";
6421 } elsif ($state->{status} eq 'active') {
6422 sleep(1);
6423 next;
6424 } elsif ($state->{status} eq 'completed') {
b0a9a385 6425 print "State saved, quitting\n";
159719e5
DC
6426 last;
6427 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6428 die "query-savevm failed with error '$state->{error}'\n"
6429 } else {
6430 die "query-savevm returned status '$state->{status}'\n";
6431 }
6432 }
6433 };
6434 my $err = $@;
6435
6436 PVE::QemuConfig->lock_config($vmid, sub {
6437 $conf = PVE::QemuConfig->load_config($vmid);
6438 if ($err) {
6439 # cleanup, but leave suspending lock, to indicate something went wrong
6440 eval {
0a13e08e 6441 mon_cmd($vmid, "savevm-end");
159719e5
DC
6442 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6443 PVE::Storage::vdisk_free($storecfg, $vmstate);
ea1c2110 6444 delete $conf->@{qw(vmstate runningmachine runningcpu)};
159719e5
DC
6445 PVE::QemuConfig->write_config($vmid, $conf);
6446 };
6447 warn $@ if $@;
6448 die $err;
6449 }
6450
6451 die "lock changed unexpectedly\n"
6452 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6453
0a13e08e 6454 mon_cmd($vmid, "quit");
159719e5
DC
6455 $conf->{lock} = 'suspended';
6456 PVE::QemuConfig->write_config($vmid, $conf);
6457 });
6458 }
1e3baf05
DM
6459}
6460
a20dc58a
FG
6461# $nocheck is set when called as part of a migration - in this context the
6462# location of the config file (source or target node) is not deterministic,
6463# since migration cannot wait for pmxcfs to process the rename
1e3baf05 6464sub vm_resume {
289e0b85 6465 my ($vmid, $skiplock, $nocheck) = @_;
1e3baf05 6466
ffda963f 6467 PVE::QemuConfig->lock_config($vmid, sub {
0a13e08e 6468 my $res = mon_cmd($vmid, 'query-status');
c2786bed 6469 my $resume_cmd = 'cont';
8e0c97bb 6470 my $reset = 0;
270bfff2
FG
6471 my $conf;
6472 if ($nocheck) {
ad9e347c
FG
6473 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6474 if ($@) {
6475 my $vmlist = PVE::Cluster::get_vmlist();
6476 if (exists($vmlist->{ids}->{$vmid})) {
6477 my $node = $vmlist->{ids}->{$vmid}->{node};
6478 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6479 }
6480 if (!$conf) {
6481 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6482 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6483 }
270bfff2 6484 }
270bfff2
FG
6485 } else {
6486 $conf = PVE::QemuConfig->load_config($vmid);
6487 }
c2786bed 6488
8e0c97bb
SR
6489 if ($res->{status}) {
6490 return if $res->{status} eq 'running'; # job done, go home
6491 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6492 $reset = 1 if $res->{status} eq 'shutdown';
c2786bed
DC
6493 }
6494
289e0b85 6495 if (!$nocheck) {
e79706d4
FG
6496 PVE::QemuConfig->check_lock($conf)
6497 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
289e0b85 6498 }
3e24733b 6499
8e0c97bb
SR
6500 if ($reset) {
6501 # required if a VM shuts down during a backup and we get a resume
6502 # request before the backup finishes for example
6503 mon_cmd($vmid, "system_reset");
6504 }
21947fea
AD
6505
6506 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6507
0a13e08e 6508 mon_cmd($vmid, $resume_cmd);
1e3baf05
DM
6509 });
6510}
6511
5fdbe4f0
DM
6512sub vm_sendkey {
6513 my ($vmid, $skiplock, $key) = @_;
1e3baf05 6514
ffda963f 6515 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6516
ffda963f 6517 my $conf = PVE::QemuConfig->load_config($vmid);
f5eb281a 6518
7b7c6d1b 6519 # there is no qmp command, so we use the human monitor command
0a13e08e 6520 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
d30820d6 6521 die $res if $res ne '';
1e3baf05
DM
6522 });
6523}
6524
d6deb7f6
TL
6525sub check_bridge_access {
6526 my ($rpcenv, $authuser, $conf) = @_;
6527
6528 return 1 if $authuser eq 'root@pam';
6529
6530 for my $opt (sort keys $conf->%*) {
6531 next if $opt !~ m/^net\d+$/;
6532 my $net = parse_net($conf->{$opt});
6533 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6534 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6535 }
6536 return 1;
6537};
6538
e3971865
DC
6539sub check_mapping_access {
6540 my ($rpcenv, $user, $conf) = @_;
6541
6542 for my $opt (keys $conf->%*) {
6543 if ($opt =~ m/^usb\d+$/) {
6544 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6545 if (my $host = $device->{host}) {
6546 die "only root can set '$opt' config for real devices\n"
6547 if $host !~ m/^spice$/i && $user ne 'root@pam';
6548 } elsif ($device->{mapping}) {
6549 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6550 } else {
6551 die "either 'host' or 'mapping' must be set.\n";
6552 }
9b71c34d
DC
6553 } elsif ($opt =~ m/^hostpci\d+$/) {
6554 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6555 if ($device->{host}) {
6556 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6557 } elsif ($device->{mapping}) {
6558 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6559 } else {
6560 die "either 'host' or 'mapping' must be set.\n";
6561 }
e3971865
DC
6562 }
6563 }
6564};
6565
e3971865
DC
6566sub check_restore_permissions {
6567 my ($rpcenv, $user, $conf) = @_;
621edb2b 6568
e3971865
DC
6569 check_bridge_access($rpcenv, $user, $conf);
6570 check_mapping_access($rpcenv, $user, $conf);
6571}
3e16d5fc
DM
6572# vzdump restore implementaion
6573
ed221350 6574sub tar_archive_read_firstfile {
3e16d5fc 6575 my $archive = shift;
afdb31d5 6576
3e16d5fc
DM
6577 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6578
6579 # try to detect archive type first
387ba257 6580 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
3e16d5fc 6581 die "unable to open file '$archive'\n";
387ba257 6582 my $firstfile = <$fh>;
3e16d5fc 6583 kill 15, $pid;
387ba257 6584 close $fh;
3e16d5fc
DM
6585
6586 die "ERROR: archive contaions no data\n" if !$firstfile;
6587 chomp $firstfile;
6588
6589 return $firstfile;
6590}
6591
ed221350
DM
6592sub tar_restore_cleanup {
6593 my ($storecfg, $statfile) = @_;
3e16d5fc
DM
6594
6595 print STDERR "starting cleanup\n";
6596
6597 if (my $fd = IO::File->new($statfile, "r")) {
6598 while (defined(my $line = <$fd>)) {
6599 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6600 my $volid = $2;
6601 eval {
6602 if ($volid =~ m|^/|) {
6603 unlink $volid || die 'unlink failed\n';
6604 } else {
ed221350 6605 PVE::Storage::vdisk_free($storecfg, $volid);
3e16d5fc 6606 }
afdb31d5 6607 print STDERR "temporary volume '$volid' sucessfuly removed\n";
3e16d5fc
DM
6608 };
6609 print STDERR "unable to cleanup '$volid' - $@" if $@;
6610 } else {
6611 print STDERR "unable to parse line in statfile - $line";
afdb31d5 6612 }
3e16d5fc
DM
6613 }
6614 $fd->close();
6615 }
6616}
6617
d1e92cf6 6618sub restore_file_archive {
a0d1b1a2 6619 my ($archive, $vmid, $user, $opts) = @_;
3e16d5fc 6620
a2ec5a67
FG
6621 return restore_vma_archive($archive, $vmid, $user, $opts)
6622 if $archive eq '-';
6623
c6d51783
AA
6624 my $info = PVE::Storage::archive_info($archive);
6625 my $format = $opts->{format} // $info->{format};
6626 my $comp = $info->{compression};
91bd6c90
DM
6627
6628 # try to detect archive format
6629 if ($format eq 'tar') {
6630 return restore_tar_archive($archive, $vmid, $user, $opts);
6631 } else {
6632 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6633 }
6634}
6635
d1e92cf6
DM
6636# hepler to remove disks that will not be used after restore
6637my $restore_cleanup_oldconf = sub {
6638 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6639
01a4377f
FE
6640 my $kept_disks = {};
6641
912792e2 6642 PVE::QemuConfig->foreach_volume($oldconf, sub {
d1e92cf6
DM
6643 my ($ds, $drive) = @_;
6644
6645 return if drive_is_cdrom($drive, 1);
6646
6647 my $volid = $drive->{file};
6648 return if !$volid || $volid =~ m|^/|;
6649
6650 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6651 return if !$path || !$owner || ($owner != $vmid);
6652
6653 # Note: only delete disk we want to restore
6654 # other volumes will become unused
6655 if ($virtdev_hash->{$ds}) {
6656 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6657 if (my $err = $@) {
6658 warn $err;
6659 }
01a4377f
FE
6660 } else {
6661 $kept_disks->{$volid} = 1;
d1e92cf6
DM
6662 }
6663 });
6664
01a4377f
FE
6665 # after the restore we have no snapshots anymore
6666 for my $snapname (keys $oldconf->{snapshots}->%*) {
d1e92cf6
DM
6667 my $snap = $oldconf->{snapshots}->{$snapname};
6668 if ($snap->{vmstate}) {
6669 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6670 if (my $err = $@) {
6671 warn $err;
6672 }
6673 }
01a4377f
FE
6674
6675 for my $volid (keys $kept_disks->%*) {
6676 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6677 warn $@ if $@;
6678 }
d1e92cf6
DM
6679 }
6680};
6681
9f3d73bc
DM
6682# Helper to parse vzdump backup device hints
6683#
6684# $rpcenv: Environment, used to ckeck storage permissions
6685# $user: User ID, to check storage permissions
6686# $storecfg: Storage configuration
6687# $fh: the file handle for reading the configuration
6688# $devinfo: should contain device sizes for all backu-up'ed devices
6689# $options: backup options (pool, default storage)
6690#
6691# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6692my $parse_backup_hints = sub {
6693 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
d1e92cf6 6694
36d4bdcb
TL
6695 my $check_storage = sub { # assert if an image can be allocate
6696 my ($storeid, $scfg) = @_;
6697 die "Content type 'images' is not available on storage '$storeid'\n"
6698 if !$scfg->{content}->{images};
6699 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6700 if $user ne 'root@pam';
6701 };
d1e92cf6 6702
36d4bdcb 6703 my $virtdev_hash = {};
9f3d73bc
DM
6704 while (defined(my $line = <$fh>)) {
6705 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6706 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6707 die "archive does not contain data for drive '$virtdev'\n"
6708 if !$devinfo->{$devname};
6709
6710 if (defined($options->{storage})) {
6711 $storeid = $options->{storage} || 'local';
6712 } elsif (!$storeid) {
6713 $storeid = 'local';
d1e92cf6 6714 }
9f3d73bc
DM
6715 $format = 'raw' if !$format;
6716 $devinfo->{$devname}->{devname} = $devname;
6717 $devinfo->{$devname}->{virtdev} = $virtdev;
6718 $devinfo->{$devname}->{format} = $format;
6719 $devinfo->{$devname}->{storeid} = $storeid;
6720
62af60cd 6721 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
36d4bdcb 6722 $check_storage->($storeid, $scfg); # permission and content type check
d1e92cf6 6723
9f3d73bc
DM
6724 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6725 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6726 my $virtdev = $1;
6727 my $drive = parse_drive($virtdev, $2);
36d4bdcb 6728
9f3d73bc
DM
6729 if (drive_is_cloudinit($drive)) {
6730 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
5364990d
TL
6731 $storeid = $options->{storage} if defined ($options->{storage});
6732 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
9f3d73bc 6733 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
d1e92cf6 6734
36d4bdcb 6735 $check_storage->($storeid, $scfg); # permission and content type check
9f8ba326 6736
9f3d73bc
DM
6737 $virtdev_hash->{$virtdev} = {
6738 format => $format,
5364990d 6739 storeid => $storeid,
9f3d73bc
DM
6740 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6741 is_cloudinit => 1,
6742 };
d1e92cf6 6743 }
9f3d73bc
DM
6744 }
6745 }
d1e92cf6 6746
9f3d73bc
DM
6747 return $virtdev_hash;
6748};
d1e92cf6 6749
9f3d73bc
DM
6750# Helper to allocate and activate all volumes required for a restore
6751#
6752# $storecfg: Storage configuration
6753# $virtdev_hash: as returned by parse_backup_hints()
6754#
6755# Returns: { $virtdev => $volid }
6756my $restore_allocate_devices = sub {
6757 my ($storecfg, $virtdev_hash, $vmid) = @_;
d1e92cf6 6758
9f3d73bc
DM
6759 my $map = {};
6760 foreach my $virtdev (sort keys %$virtdev_hash) {
6761 my $d = $virtdev_hash->{$virtdev};
6762 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6763 my $storeid = $d->{storeid};
6764 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
d1e92cf6 6765
9f3d73bc
DM
6766 # test if requested format is supported
6767 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6768 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6769 $d->{format} = $defFormat if !$supported;
d1e92cf6 6770
9f3d73bc
DM
6771 my $name;
6772 if ($d->{is_cloudinit}) {
6773 $name = "vm-$vmid-cloudinit";
c997e24a
ML
6774 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6775 if ($scfg->{path}) {
6776 $name .= ".$d->{format}";
6777 }
d1e92cf6
DM
6778 }
6779
4df98f2f
TL
6780 my $volid = PVE::Storage::vdisk_alloc(
6781 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
d1e92cf6 6782
9f3d73bc
DM
6783 print STDERR "new volume ID is '$volid'\n";
6784 $d->{volid} = $volid;
d1e92cf6 6785
9f3d73bc 6786 PVE::Storage::activate_volumes($storecfg, [$volid]);
d1e92cf6 6787
9f3d73bc 6788 $map->{$virtdev} = $volid;
d1e92cf6
DM
6789 }
6790
9f3d73bc
DM
6791 return $map;
6792};
d1e92cf6 6793
c62d7cf5 6794sub restore_update_config_line {
eabac302 6795 my ($cookie, $map, $line, $unique) = @_;
91bd6c90 6796
98a4b3fb
FE
6797 return '' if $line =~ m/^\#qmdump\#/;
6798 return '' if $line =~ m/^\#vzdump\#/;
6799 return '' if $line =~ m/^lock:/;
6800 return '' if $line =~ m/^unused\d+:/;
6801 return '' if $line =~ m/^parent:/;
6802
6803 my $res = '';
91bd6c90 6804
b5b99790 6805 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
91bd6c90
DM
6806 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6807 # try to convert old 1.X settings
6808 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6809 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6810 my ($model, $macaddr) = split(/\=/, $devconfig);
b5b99790 6811 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
91bd6c90
DM
6812 my $net = {
6813 model => $model,
6814 bridge => "vmbr$ind",
6815 macaddr => $macaddr,
6816 };
6817 my $netstr = print_net($net);
6818
98a4b3fb 6819 $res .= "net$cookie->{netcount}: $netstr\n";
91bd6c90
DM
6820 $cookie->{netcount}++;
6821 }
6822 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6823 my ($id, $netstr) = ($1, $2);
6824 my $net = parse_net($netstr);
b5b99790 6825 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
91bd6c90 6826 $netstr = print_net($net);
98a4b3fb 6827 $res .= "$id: $netstr\n";
f9dde219 6828 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
91bd6c90 6829 my $virtdev = $1;
907ea891 6830 my $value = $3;
d9faf790
WB
6831 my $di = parse_drive($virtdev, $value);
6832 if (defined($di->{backup}) && !$di->{backup}) {
98a4b3fb 6833 $res .= "#$line";
c0f7406e 6834 } elsif ($map->{$virtdev}) {
8fd57431 6835 delete $di->{format}; # format can change on restore
91bd6c90 6836 $di->{file} = $map->{$virtdev};
71c58bb7 6837 $value = print_drive($di);
98a4b3fb 6838 $res .= "$virtdev: $value\n";
91bd6c90 6839 } else {
98a4b3fb 6840 $res .= $line;
91bd6c90 6841 }
1a0c2f03 6842 } elsif (($line =~ m/^vmgenid: (.*)/)) {
babecffe 6843 my $vmgenid = $1;
6ee499ff 6844 if ($vmgenid ne '0') {
1a0c2f03 6845 # always generate a new vmgenid if there was a valid one setup
6ee499ff
DC
6846 $vmgenid = generate_uuid();
6847 }
98a4b3fb 6848 $res .= "vmgenid: $vmgenid\n";
19a5dd55
WL
6849 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6850 my ($uuid, $uuid_str);
6851 UUID::generate($uuid);
6852 UUID::unparse($uuid, $uuid_str);
6853 my $smbios1 = parse_smbios1($2);
6854 $smbios1->{uuid} = $uuid_str;
98a4b3fb 6855 $res .= $1.print_smbios1($smbios1)."\n";
91bd6c90 6856 } else {
98a4b3fb 6857 $res .= $line;
91bd6c90 6858 }
98a4b3fb
FE
6859
6860 return $res;
c62d7cf5 6861}
9f3d73bc
DM
6862
6863my $restore_deactivate_volumes = sub {
e8b07b29 6864 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc
DM
6865
6866 my $vollist = [];
e8b07b29
FE
6867 for my $dev (values $virtdev_hash->%*) {
6868 push $vollist->@*, $dev->{volid} if $dev->{volid};
9f3d73bc
DM
6869 }
6870
ff86112c
FE
6871 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6872 print STDERR $@ if $@;
9f3d73bc
DM
6873};
6874
6875my $restore_destroy_volumes = sub {
e8b07b29 6876 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc 6877
e8b07b29
FE
6878 for my $dev (values $virtdev_hash->%*) {
6879 my $volid = $dev->{volid} or next;
9f3d73bc 6880 eval {
e60afe82 6881 PVE::Storage::vdisk_free($storecfg, $volid);
9f3d73bc
DM
6882 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6883 };
6884 print STDERR "unable to cleanup '$volid' - $@" if $@;
6885 }
6886};
91bd6c90 6887
621edb2b 6888sub restore_merge_config {
202a2a0b
FE
6889 my ($filename, $backup_conf_raw, $override_conf) = @_;
6890
6891 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6892 for my $key (keys $override_conf->%*) {
6893 $backup_conf->{$key} = $override_conf->{$key};
6894 }
6895
6896 return $backup_conf;
621edb2b 6897}
202a2a0b 6898
91bd6c90 6899sub scan_volids {
9a8ba127 6900 my ($cfg, $vmid) = @_;
91bd6c90 6901
9a8ba127 6902 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
91bd6c90
DM
6903
6904 my $volid_hash = {};
6905 foreach my $storeid (keys %$info) {
6906 foreach my $item (@{$info->{$storeid}}) {
6907 next if !($item->{volid} && $item->{size});
5996a936 6908 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
91bd6c90
DM
6909 $volid_hash->{$item->{volid}} = $item;
6910 }
6911 }
6912
6913 return $volid_hash;
6914}
6915
68b108ee 6916sub update_disk_config {
91bd6c90 6917 my ($vmid, $conf, $volid_hash) = @_;
be190583 6918
91bd6c90 6919 my $changes;
9b29cbd0 6920 my $prefix = "VM $vmid";
91bd6c90 6921
c449137a
DC
6922 # used and unused disks
6923 my $referenced = {};
91bd6c90 6924
5996a936
DM
6925 # Note: it is allowed to define multiple storages with same path (alias), so
6926 # we need to check both 'volid' and real 'path' (two different volid can point
6927 # to the same path).
6928
c449137a 6929 my $referencedpath = {};
be190583 6930
91bd6c90 6931 # update size info
0c4fef3f 6932 PVE::QemuConfig->foreach_volume($conf, sub {
ca04977c 6933 my ($opt, $drive) = @_;
91bd6c90 6934
ca04977c
FE
6935 my $volid = $drive->{file};
6936 return if !$volid;
4df98f2f 6937 my $volume = $volid_hash->{$volid};
91bd6c90 6938
ca04977c
FE
6939 # mark volid as "in-use" for next step
6940 $referenced->{$volid} = 1;
4df98f2f 6941 if ($volume && (my $path = $volume->{path})) {
ca04977c 6942 $referencedpath->{$path} = 1;
91bd6c90 6943 }
ca04977c
FE
6944
6945 return if drive_is_cdrom($drive);
4df98f2f 6946 return if !$volume;
ca04977c 6947
4df98f2f 6948 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
ca04977c
FE
6949 if (defined($updated)) {
6950 $changes = 1;
6951 $conf->{$opt} = print_drive($updated);
9b29cbd0 6952 print "$prefix ($opt): $msg\n";
ca04977c
FE
6953 }
6954 });
91bd6c90 6955
5996a936 6956 # remove 'unusedX' entry if volume is used
ca04977c
FE
6957 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6958 my ($opt, $drive) = @_;
6959
6960 my $volid = $drive->{file};
6961 return if !$volid;
6962
f7d1505b
TL
6963 my $path;
6964 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
c449137a 6965 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
68b108ee 6966 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
5996a936
DM
6967 $changes = 1;
6968 delete $conf->{$opt};
6969 }
c449137a
DC
6970
6971 $referenced->{$volid} = 1;
6972 $referencedpath->{$path} = 1 if $path;
ca04977c 6973 });
5996a936 6974
91bd6c90
DM
6975 foreach my $volid (sort keys %$volid_hash) {
6976 next if $volid =~ m/vm-$vmid-state-/;
c449137a 6977 next if $referenced->{$volid};
5996a936
DM
6978 my $path = $volid_hash->{$volid}->{path};
6979 next if !$path; # just to be sure
c449137a 6980 next if $referencedpath->{$path};
91bd6c90 6981 $changes = 1;
53b81297 6982 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
68b108ee 6983 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
c449137a 6984 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
91bd6c90
DM
6985 }
6986
6987 return $changes;
6988}
6989
6990sub rescan {
9224dcee 6991 my ($vmid, $nolock, $dryrun) = @_;
91bd6c90 6992
20519efc 6993 my $cfg = PVE::Storage::config();
91bd6c90 6994
53b81297 6995 print "rescan volumes...\n";
9a8ba127 6996 my $volid_hash = scan_volids($cfg, $vmid);
91bd6c90
DM
6997
6998 my $updatefn = sub {
6999 my ($vmid) = @_;
7000
ffda963f 7001 my $conf = PVE::QemuConfig->load_config($vmid);
be190583 7002
ffda963f 7003 PVE::QemuConfig->check_lock($conf);
91bd6c90 7004
03da3f0d
DM
7005 my $vm_volids = {};
7006 foreach my $volid (keys %$volid_hash) {
7007 my $info = $volid_hash->{$volid};
7008 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
7009 }
7010
68b108ee 7011 my $changes = update_disk_config($vmid, $conf, $vm_volids);
91bd6c90 7012
9224dcee 7013 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
91bd6c90
DM
7014 };
7015
7016 if (defined($vmid)) {
7017 if ($nolock) {
7018 &$updatefn($vmid);
7019 } else {
ffda963f 7020 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
91bd6c90
DM
7021 }
7022 } else {
7023 my $vmlist = config_list();
7024 foreach my $vmid (keys %$vmlist) {
7025 if ($nolock) {
7026 &$updatefn($vmid);
7027 } else {
ffda963f 7028 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
be190583 7029 }
91bd6c90
DM
7030 }
7031 }
7032}
7033
9f3d73bc
DM
7034sub restore_proxmox_backup_archive {
7035 my ($archive, $vmid, $user, $options) = @_;
7036
7037 my $storecfg = PVE::Storage::config();
7038
7039 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7040 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7041
9f3d73bc 7042 my $fingerprint = $scfg->{fingerprint};
503e96f8 7043 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
9f3d73bc 7044
fbec3f89 7045 my $repo = PVE::PBSClient::get_repository($scfg);
2dda626d 7046 my $namespace = $scfg->{namespace};
dea4b04c 7047
26731a3c 7048 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
9f3d73bc
DM
7049 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7050 local $ENV{PBS_PASSWORD} = $password;
7051 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7052
7053 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7054 PVE::Storage::parse_volname($storecfg, $archive);
7055
7056 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7057
7058 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7059
7060 my $tmpdir = "/var/tmp/vzdumptmp$$";
7061 rmtree $tmpdir;
7062 mkpath $tmpdir;
7063
7064 my $conffile = PVE::QemuConfig->config_file($vmid);
9f3d73bc
DM
7065 # disable interrupts (always do cleanups)
7066 local $SIG{INT} =
7067 local $SIG{TERM} =
7068 local $SIG{QUIT} =
7069 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7070
7071 # Note: $oldconf is undef if VM does not exists
7072 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7073 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7074 my $new_conf_raw = '';
9f3d73bc
DM
7075
7076 my $rpcenv = PVE::RPCEnvironment::get();
e8b07b29
FE
7077 my $devinfo = {}; # info about drives included in backup
7078 my $virtdev_hash = {}; # info about allocated drives
9f3d73bc
DM
7079
7080 eval {
7081 # enable interrupts
7082 local $SIG{INT} =
7083 local $SIG{TERM} =
7084 local $SIG{QUIT} =
7085 local $SIG{HUP} =
7086 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7087
7088 my $cfgfn = "$tmpdir/qemu-server.conf";
7089 my $firewall_config_fn = "$tmpdir/fw.conf";
7090 my $index_fn = "$tmpdir/index.json";
7091
7092 my $cmd = "restore";
7093
7094 my $param = [$pbs_backup_name, "index.json", $index_fn];
7095 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7096 my $index = PVE::Tools::file_get_contents($index_fn);
7097 $index = decode_json($index);
7098
9f3d73bc
DM
7099 foreach my $info (@{$index->{files}}) {
7100 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7101 my $devname = $1;
7102 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7103 $devinfo->{$devname}->{size} = $1;
7104 } else {
7105 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7106 }
7107 }
7108 }
7109
4df98f2f
TL
7110 my $is_qemu_server_backup = scalar(
7111 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7112 );
9f3d73bc
DM
7113 if (!$is_qemu_server_backup) {
7114 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7115 }
7116 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7117
7118 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7119 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7120
7121 if ($has_firewall_config) {
7122 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7123 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7124
7125 my $pve_firewall_dir = '/etc/pve/firewall';
7126 mkdir $pve_firewall_dir; # make sure the dir exists
7127 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7128 }
7129
7130 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7131 die "unable to read qemu-server.conf - $!\n";
9f3d73bc 7132
e8b07b29 7133 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
9f3d73bc
DM
7134
7135 # fixme: rate limit?
7136
7137 # create empty/temp config
7138 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7139
7140 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7141
7142 # allocate volumes
7143 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7144
6f94e162
SR
7145 foreach my $virtdev (sort keys %$virtdev_hash) {
7146 my $d = $virtdev_hash->{$virtdev};
7147 next if $d->{is_cloudinit}; # no need to restore cloudinit
9f3d73bc 7148
55c7f9cf 7149 # this fails if storage is unavailable
6f94e162 7150 my $volid = $d->{volid};
6f94e162 7151 my $path = PVE::Storage::path($storecfg, $volid);
9f3d73bc 7152
f9dde219
SR
7153 # for live-restore we only want to preload the efidisk and TPM state
7154 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
55c7f9cf 7155
21a9ec2a
WB
7156 my @ns_arg;
7157 if (defined(my $ns = $scfg->{namespace})) {
7158 @ns_arg = ('--ns', $ns);
7159 }
7160
6f94e162
SR
7161 my $pbs_restore_cmd = [
7162 '/usr/bin/pbs-restore',
7163 '--repository', $repo,
21a9ec2a 7164 @ns_arg,
6f94e162
SR
7165 $pbs_backup_name,
7166 "$d->{devname}.img.fidx",
7167 $path,
7168 '--verbose',
7169 ];
55fb78aa 7170
6f94e162
SR
7171 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7172 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
9f3d73bc 7173
6f94e162
SR
7174 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7175 push @$pbs_restore_cmd, '--skip-zero';
26731a3c 7176 }
6f94e162
SR
7177
7178 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7179 print "restore proxmox backup image: $dbg_cmdstring\n";
7180 run_command($pbs_restore_cmd);
9f3d73bc
DM
7181 }
7182
7183 $fh->seek(0, 0) || die "seek failed - $!\n";
7184
9f3d73bc
DM
7185 my $cookie = { netcount => 0 };
7186 while (defined(my $line = <$fh>)) {
c62d7cf5 7187 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7188 $cookie,
98a4b3fb
FE
7189 $map,
7190 $line,
7191 $options->{unique},
7192 );
9f3d73bc
DM
7193 }
7194
7195 $fh->close();
9f3d73bc
DM
7196 };
7197 my $err = $@;
7198
26731a3c 7199 if ($err || !$options->{live}) {
e8b07b29 7200 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
26731a3c 7201 }
9f3d73bc
DM
7202
7203 rmtree $tmpdir;
7204
7205 if ($err) {
e8b07b29 7206 $restore_destroy_volumes->($storecfg, $virtdev_hash);
9f3d73bc
DM
7207 die $err;
7208 }
7209
f7551170
SR
7210 if ($options->{live}) {
7211 # keep lock during live-restore
7212 $new_conf_raw .= "\nlock: create";
7213 }
7214
621edb2b 7215 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
e3971865 7216 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7217 PVE::QemuConfig->write_config($vmid, $new_conf);
9f3d73bc
DM
7218
7219 eval { rescan($vmid, 1); };
7220 warn $@ if $@;
26731a3c
SR
7221
7222 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7223
7224 if ($options->{live}) {
fefd65a1
SR
7225 # enable interrupts
7226 local $SIG{INT} =
7227 local $SIG{TERM} =
7228 local $SIG{QUIT} =
7229 local $SIG{HUP} =
7230 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
26731a3c 7231
fefd65a1
SR
7232 my $conf = PVE::QemuConfig->load_config($vmid);
7233 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
26731a3c 7234
f9dde219
SR
7235 # these special drives are already restored before start
7236 delete $devinfo->{'drive-efidisk0'};
7237 delete $devinfo->{'drive-tpmstate0-backup'};
2dda626d
DC
7238
7239 my $pbs_opts = {
7240 repo => $repo,
7241 keyfile => $keyfile,
7242 snapshot => $pbs_backup_name,
7243 namespace => $namespace,
7244 };
7245 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
f7551170
SR
7246
7247 PVE::QemuConfig->remove_lock($vmid, "create");
26731a3c
SR
7248 }
7249}
7250
7251sub pbs_live_restore {
2dda626d 7252 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
26731a3c 7253
88cabb62 7254 print "starting VM for live-restore\n";
2dda626d 7255 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
26731a3c
SR
7256
7257 my $pbs_backing = {};
8986e36e 7258 for my $ds (keys %$restored_disks) {
26731a3c 7259 $ds =~ m/^drive-(.*)$/;
88cabb62
SR
7260 my $confname = $1;
7261 $pbs_backing->{$confname} = {
2dda626d
DC
7262 repository => $opts->{repo},
7263 snapshot => $opts->{snapshot},
26731a3c
SR
7264 archive => "$ds.img.fidx",
7265 };
2dda626d
DC
7266 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7267 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
88cabb62
SR
7268
7269 my $drive = parse_drive($confname, $conf->{$confname});
7270 print "restoring '$ds' to '$drive->{file}'\n";
26731a3c
SR
7271 }
7272
fd70c843 7273 my $drives_streamed = 0;
26731a3c
SR
7274 eval {
7275 # make sure HA doesn't interrupt our restore by stopping the VM
7276 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
fd70c843 7277 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
26731a3c
SR
7278 }
7279
fd70c843
TL
7280 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7281 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
bfb12678 7282 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
26731a3c 7283
26697640
SR
7284 my $qmeventd_fd = register_qmeventd_handle($vmid);
7285
26731a3c
SR
7286 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7287 # this will effectively collapse the backing image chain consisting of
7288 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7289 # removes itself once all backing images vanish with 'auto-remove=on')
7290 my $jobs = {};
8986e36e 7291 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7292 my $job_id = "restore-$ds";
7293 mon_cmd($vmid, 'block-stream',
7294 'job-id' => $job_id,
7295 device => "$ds",
7296 );
7297 $jobs->{$job_id} = {};
7298 }
7299
7300 mon_cmd($vmid, 'cont');
7301 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7302
a09b39f1
TL
7303 print "restore-drive jobs finished successfully, removing all tracking block devices"
7304 ." to disconnect from Proxmox Backup Server\n";
7305
8986e36e 7306 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7307 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7308 }
26697640
SR
7309
7310 close($qmeventd_fd);
26731a3c
SR
7311 };
7312
7313 my $err = $@;
7314
7315 if ($err) {
8b8893c3 7316 warn "An error occurred during live-restore: $err\n";
26731a3c
SR
7317 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7318 die "live-restore failed\n";
7319 }
9f3d73bc
DM
7320}
7321
91bd6c90
DM
7322sub restore_vma_archive {
7323 my ($archive, $vmid, $user, $opts, $comp) = @_;
7324
91bd6c90
DM
7325 my $readfrom = $archive;
7326
7c536e11
WB
7327 my $cfg = PVE::Storage::config();
7328 my $commands = [];
7329 my $bwlimit = $opts->{bwlimit};
7330
7331 my $dbg_cmdstring = '';
7332 my $add_pipe = sub {
7333 my ($cmd) = @_;
7334 push @$commands, $cmd;
7335 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7336 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
91bd6c90 7337 $readfrom = '-';
7c536e11
WB
7338 };
7339
7340 my $input = undef;
7341 if ($archive eq '-') {
7342 $input = '<&STDIN';
7343 } else {
7344 # If we use a backup from a PVE defined storage we also consider that
7345 # storage's rate limit:
7346 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7347 if (defined($volid)) {
7348 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7349 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7350 if ($readlimit) {
7351 print STDERR "applying read rate limit: $readlimit\n";
9444c6e4 7352 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7c536e11
WB
7353 $add_pipe->($cstream);
7354 }
7355 }
7356 }
7357
7358 if ($comp) {
c6d51783
AA
7359 my $info = PVE::Storage::decompressor_info('vma', $comp);
7360 my $cmd = $info->{decompressor};
7361 push @$cmd, $readfrom;
7c536e11 7362 $add_pipe->($cmd);
91bd6c90
DM
7363 }
7364
7365 my $tmpdir = "/var/tmp/vzdumptmp$$";
7366 rmtree $tmpdir;
7367
7368 # disable interrupts (always do cleanups)
5b97ef24
TL
7369 local $SIG{INT} =
7370 local $SIG{TERM} =
7371 local $SIG{QUIT} =
7372 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
91bd6c90
DM
7373
7374 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7375 POSIX::mkfifo($mapfifo, 0600);
7376 my $fifofh;
808a65b5 7377 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
91bd6c90 7378
7c536e11 7379 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
91bd6c90
DM
7380
7381 my $oldtimeout;
7382 my $timeout = 5;
7383
e8b07b29
FE
7384 my $devinfo = {}; # info about drives included in backup
7385 my $virtdev_hash = {}; # info about allocated drives
91bd6c90
DM
7386
7387 my $rpcenv = PVE::RPCEnvironment::get();
7388
ffda963f 7389 my $conffile = PVE::QemuConfig->config_file($vmid);
91bd6c90 7390
ae200950 7391 # Note: $oldconf is undef if VM does not exist
ffda963f
FG
7392 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7393 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7394 my $new_conf_raw = '';
ed221350 7395
7c536e11
WB
7396 my %storage_limits;
7397
91bd6c90 7398 my $print_devmap = sub {
91bd6c90
DM
7399 my $cfgfn = "$tmpdir/qemu-server.conf";
7400
7401 # we can read the config - that is already extracted
7402 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7403 die "unable to read qemu-server.conf - $!\n";
91bd6c90 7404
6738ab9c 7405 my $fwcfgfn = "$tmpdir/qemu-server.fw";
3457d090
WL
7406 if (-f $fwcfgfn) {
7407 my $pve_firewall_dir = '/etc/pve/firewall';
7408 mkdir $pve_firewall_dir; # make sure the dir exists
7409 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7410 }
6738ab9c 7411
e8b07b29 7412 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
91bd6c90 7413
c8964278
FE
7414 foreach my $info (values %{$virtdev_hash}) {
7415 my $storeid = $info->{storeid};
7416 next if defined($storage_limits{$storeid});
7417
7418 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7419 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7420 $storage_limits{$storeid} = $limit * 1024;
7c536e11
WB
7421 }
7422
91bd6c90 7423 foreach my $devname (keys %$devinfo) {
be190583
DM
7424 die "found no device mapping information for device '$devname'\n"
7425 if !$devinfo->{$devname}->{virtdev};
91bd6c90
DM
7426 }
7427
ed221350 7428 # create empty/temp config
be190583 7429 if ($oldconf) {
ed221350 7430 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
d1e92cf6 7431 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
ed221350
DM
7432 }
7433
9f3d73bc
DM
7434 # allocate volumes
7435 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7436
7437 # print restore information to $fifofh
91bd6c90
DM
7438 foreach my $virtdev (sort keys %$virtdev_hash) {
7439 my $d = $virtdev_hash->{$virtdev};
9f3d73bc
DM
7440 next if $d->{is_cloudinit}; # no need to restore cloudinit
7441
7c536e11 7442 my $storeid = $d->{storeid};
9f3d73bc 7443 my $volid = $d->{volid};
7c536e11
WB
7444
7445 my $map_opts = '';
7446 if (my $limit = $storage_limits{$storeid}) {
7447 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7448 }
8fd57431 7449
91bd6c90 7450 my $write_zeros = 1;
88240a83 7451 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
91bd6c90
DM
7452 $write_zeros = 0;
7453 }
7454
9f3d73bc 7455 my $path = PVE::Storage::path($cfg, $volid);
87056e18 7456
9f3d73bc 7457 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
91bd6c90 7458
9f3d73bc 7459 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
91bd6c90
DM
7460 }
7461
7462 $fh->seek(0, 0) || die "seek failed - $!\n";
7463
91bd6c90
DM
7464 my $cookie = { netcount => 0 };
7465 while (defined(my $line = <$fh>)) {
c62d7cf5 7466 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7467 $cookie,
98a4b3fb
FE
7468 $map,
7469 $line,
7470 $opts->{unique},
7471 );
91bd6c90
DM
7472 }
7473
7474 $fh->close();
91bd6c90
DM
7475 };
7476
7477 eval {
7478 # enable interrupts
6cb0144a
EK
7479 local $SIG{INT} =
7480 local $SIG{TERM} =
7481 local $SIG{QUIT} =
7482 local $SIG{HUP} =
7483 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
91bd6c90
DM
7484 local $SIG{ALRM} = sub { die "got timeout\n"; };
7485
7486 $oldtimeout = alarm($timeout);
7487
7488 my $parser = sub {
7489 my $line = shift;
7490
7491 print "$line\n";
7492
7493 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7494 my ($dev_id, $size, $devname) = ($1, $2, $3);
7495 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7496 } elsif ($line =~ m/^CTIME: /) {
46f58b5f 7497 # we correctly received the vma config, so we can disable
3cf90d7a
DM
7498 # the timeout now for disk allocation (set to 10 minutes, so
7499 # that we always timeout if something goes wrong)
7500 alarm(600);
91bd6c90
DM
7501 &$print_devmap();
7502 print $fifofh "done\n";
7503 my $tmp = $oldtimeout || 0;
7504 $oldtimeout = undef;
7505 alarm($tmp);
7506 close($fifofh);
808a65b5 7507 $fifofh = undef;
91bd6c90
DM
7508 }
7509 };
be190583 7510
7c536e11
WB
7511 print "restore vma archive: $dbg_cmdstring\n";
7512 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
91bd6c90
DM
7513 };
7514 my $err = $@;
7515
7516 alarm($oldtimeout) if $oldtimeout;
7517
e8b07b29 7518 $restore_deactivate_volumes->($cfg, $virtdev_hash);
5f96f4df 7519
808a65b5 7520 close($fifofh) if $fifofh;
91bd6c90 7521 unlink $mapfifo;
9f3d73bc 7522 rmtree $tmpdir;
91bd6c90
DM
7523
7524 if ($err) {
e8b07b29 7525 $restore_destroy_volumes->($cfg, $virtdev_hash);
91bd6c90
DM
7526 die $err;
7527 }
7528
621edb2b 7529 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
e3971865 7530 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7531 PVE::QemuConfig->write_config($vmid, $new_conf);
ed221350 7532
91bd6c90
DM
7533 eval { rescan($vmid, 1); };
7534 warn $@ if $@;
26731a3c
SR
7535
7536 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
91bd6c90
DM
7537}
7538
7539sub restore_tar_archive {
7540 my ($archive, $vmid, $user, $opts) = @_;
7541
202a2a0b
FE
7542 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7543 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7544 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7545 }
7546
9c502e26 7547 if ($archive ne '-') {
ed221350 7548 my $firstfile = tar_archive_read_firstfile($archive);
32e54050 7549 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
9c502e26
DM
7550 if $firstfile ne 'qemu-server.conf';
7551 }
3e16d5fc 7552
20519efc 7553 my $storecfg = PVE::Storage::config();
ebb55558 7554
4b026937
TL
7555 # avoid zombie disks when restoring over an existing VM -> cleanup first
7556 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7557 # skiplock=1 because qmrestore has set the 'create' lock itself already
ffda963f 7558 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
b04ea584 7559 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
ed221350 7560
3e16d5fc
DM
7561 my $tocmd = "/usr/lib/qemu-server/qmextract";
7562
2415a446 7563 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
a0d1b1a2 7564 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
3e16d5fc
DM
7565 $tocmd .= ' --prealloc' if $opts->{prealloc};
7566 $tocmd .= ' --info' if $opts->{info};
7567
a0d1b1a2 7568 # tar option "xf" does not autodetect compression when read from STDIN,
9c502e26 7569 # so we pipe to zcat
2415a446
DM
7570 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7571 PVE::Tools::shellquote("--to-command=$tocmd");
3e16d5fc
DM
7572
7573 my $tmpdir = "/var/tmp/vzdumptmp$$";
7574 mkpath $tmpdir;
7575
7576 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7577 local $ENV{VZDUMP_VMID} = $vmid;
a0d1b1a2 7578 local $ENV{VZDUMP_USER} = $user;
3e16d5fc 7579
ffda963f 7580 my $conffile = PVE::QemuConfig->config_file($vmid);
98a4b3fb 7581 my $new_conf_raw = '';
3e16d5fc
DM
7582
7583 # disable interrupts (always do cleanups)
6cb0144a
EK
7584 local $SIG{INT} =
7585 local $SIG{TERM} =
7586 local $SIG{QUIT} =
7587 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
3e16d5fc 7588
afdb31d5 7589 eval {
3e16d5fc 7590 # enable interrupts
6cb0144a
EK
7591 local $SIG{INT} =
7592 local $SIG{TERM} =
7593 local $SIG{QUIT} =
7594 local $SIG{HUP} =
7595 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
3e16d5fc 7596
9c502e26
DM
7597 if ($archive eq '-') {
7598 print "extracting archive from STDIN\n";
7599 run_command($cmd, input => "<&STDIN");
7600 } else {
7601 print "extracting archive '$archive'\n";
7602 run_command($cmd);
7603 }
3e16d5fc
DM
7604
7605 return if $opts->{info};
7606
7607 # read new mapping
7608 my $map = {};
7609 my $statfile = "$tmpdir/qmrestore.stat";
7610 if (my $fd = IO::File->new($statfile, "r")) {
7611 while (defined (my $line = <$fd>)) {
7612 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7613 $map->{$1} = $2 if $1;
7614 } else {
7615 print STDERR "unable to parse line in statfile - $line\n";
7616 }
7617 }
7618 $fd->close();
7619 }
7620
7621 my $confsrc = "$tmpdir/qemu-server.conf";
7622
f7d1505b 7623 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
3e16d5fc 7624
91bd6c90 7625 my $cookie = { netcount => 0 };
3e16d5fc 7626 while (defined (my $line = <$srcfd>)) {
c62d7cf5 7627 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7628 $cookie,
98a4b3fb
FE
7629 $map,
7630 $line,
7631 $opts->{unique},
7632 );
3e16d5fc
DM
7633 }
7634
7635 $srcfd->close();
3e16d5fc 7636 };
7dc7f315 7637 if (my $err = $@) {
ed221350 7638 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
3e16d5fc 7639 die $err;
afdb31d5 7640 }
3e16d5fc
DM
7641
7642 rmtree $tmpdir;
7643
98a4b3fb 7644 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
91bd6c90 7645
ed221350
DM
7646 PVE::Cluster::cfs_update(); # make sure we read new file
7647
91bd6c90
DM
7648 eval { rescan($vmid, 1); };
7649 warn $@ if $@;
3e16d5fc
DM
7650};
7651
65a5ce88 7652sub foreach_storage_used_by_vm {
18bfb361
DM
7653 my ($conf, $func) = @_;
7654
7655 my $sidhash = {};
7656
912792e2 7657 PVE::QemuConfig->foreach_volume($conf, sub {
8ddbcf8b
FG
7658 my ($ds, $drive) = @_;
7659 return if drive_is_cdrom($drive);
18bfb361
DM
7660
7661 my $volid = $drive->{file};
7662
7663 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
be190583 7664 $sidhash->{$sid} = $sid if $sid;
8ddbcf8b 7665 });
18bfb361
DM
7666
7667 foreach my $sid (sort keys %$sidhash) {
7668 &$func($sid);
7669 }
7670}
7671
6c9f59c1
TL
7672my $qemu_snap_storage = {
7673 rbd => 1,
7674};
e5eaa028 7675sub do_snapshots_with_qemu {
9d83932d
SR
7676 my ($storecfg, $volid, $deviceid) = @_;
7677
7678 return if $deviceid =~ m/tpmstate0/;
e5eaa028
WL
7679
7680 my $storage_name = PVE::Storage::parse_volume_id($volid);
8aa2ed7c 7681 my $scfg = $storecfg->{ids}->{$storage_name};
f7d1505b 7682 die "could not find storage '$storage_name'\n" if !defined($scfg);
e5eaa028 7683
8aa2ed7c 7684 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
e5eaa028
WL
7685 return 1;
7686 }
7687
7688 if ($volid =~ m/\.(qcow2|qed)$/){
7689 return 1;
7690 }
7691
d1c1af4b 7692 return;
e5eaa028
WL
7693}
7694
4dcc780c 7695sub qga_check_running {
a4938c72 7696 my ($vmid, $nowarn) = @_;
4dcc780c 7697
0a13e08e 7698 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
4dcc780c 7699 if ($@) {
7bd9abd2 7700 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
4dcc780c
WL
7701 return 0;
7702 }
7703 return 1;
7704}
7705
04a69bb4
AD
7706sub template_create {
7707 my ($vmid, $conf, $disk) = @_;
7708
04a69bb4 7709 my $storecfg = PVE::Storage::config();
04a69bb4 7710
912792e2 7711 PVE::QemuConfig->foreach_volume($conf, sub {
9cd07842
DM
7712 my ($ds, $drive) = @_;
7713
7714 return if drive_is_cdrom($drive);
7715 return if $disk && $ds ne $disk;
7716
7717 my $volid = $drive->{file};
bbd56097 7718 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
9cd07842 7719
04a69bb4
AD
7720 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7721 $drive->{file} = $voliddst;
71c58bb7 7722 $conf->{$ds} = print_drive($drive);
ffda963f 7723 PVE::QemuConfig->write_config($vmid, $conf);
04a69bb4 7724 });
04a69bb4
AD
7725}
7726
92bdc3f0
DC
7727sub convert_iscsi_path {
7728 my ($path) = @_;
7729
7730 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7731 my $portal = $1;
7732 my $target = $2;
7733 my $lun = $3;
7734
7735 my $initiator_name = get_initiator_name();
7736
7737 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7738 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7739 }
7740
7741 die "cannot convert iscsi path '$path', unkown format\n";
7742}
7743
5133de42 7744sub qemu_img_convert {
56d16f16 7745 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
5133de42
AD
7746
7747 my $storecfg = PVE::Storage::config();
7748 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7749 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7750
af1f1ec0 7751 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
6bb91c17 7752
af1f1ec0
DC
7753 my $cachemode;
7754 my $src_path;
7755 my $src_is_iscsi = 0;
bdd1feef 7756 my $src_format;
6bb91c17 7757
af1f1ec0
DC
7758 if ($src_storeid) {
7759 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
5133de42 7760 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
af1f1ec0
DC
7761 $src_format = qemu_img_format($src_scfg, $src_volname);
7762 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7763 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7764 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
a23d57d5 7765 } elsif (-f $src_volid || -b $src_volid) {
af1f1ec0 7766 $src_path = $src_volid;
e0fd2b2f 7767 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
af1f1ec0
DC
7768 $src_format = $1;
7769 }
7770 }
5133de42 7771
af1f1ec0 7772 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
5133de42 7773
af1f1ec0
DC
7774 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7775 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7776 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7777 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
5133de42 7778
af1f1ec0
DC
7779 my $cmd = [];
7780 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
bdd1feef
TL
7781 push @$cmd, '-l', "snapshot.name=$snapname"
7782 if $snapname && $src_format && $src_format eq "qcow2";
af1f1ec0
DC
7783 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7784 push @$cmd, '-T', $cachemode if defined($cachemode);
56d16f16 7785 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
af1f1ec0
DC
7786
7787 if ($src_is_iscsi) {
7788 push @$cmd, '--image-opts';
7789 $src_path = convert_iscsi_path($src_path);
bdd1feef 7790 } elsif ($src_format) {
af1f1ec0
DC
7791 push @$cmd, '-f', $src_format;
7792 }
92bdc3f0 7793
af1f1ec0
DC
7794 if ($dst_is_iscsi) {
7795 push @$cmd, '--target-image-opts';
7796 $dst_path = convert_iscsi_path($dst_path);
7797 } else {
7798 push @$cmd, '-O', $dst_format;
7799 }
92bdc3f0 7800
af1f1ec0 7801 push @$cmd, $src_path;
92bdc3f0 7802
af1f1ec0
DC
7803 if (!$dst_is_iscsi && $is_zero_initialized) {
7804 push @$cmd, "zeroinit:$dst_path";
7805 } else {
7806 push @$cmd, $dst_path;
7807 }
92bdc3f0 7808
af1f1ec0
DC
7809 my $parser = sub {
7810 my $line = shift;
7811 if($line =~ m/\((\S+)\/100\%\)/){
7812 my $percent = $1;
7813 my $transferred = int($size * $percent / 100);
b5e9d97b
TL
7814 my $total_h = render_bytes($size, 1);
7815 my $transferred_h = render_bytes($transferred, 1);
92bdc3f0 7816
6629f976 7817 print "transferred $transferred_h of $total_h ($percent%)\n";
988e2714 7818 }
5133de42 7819
af1f1ec0 7820 };
5133de42 7821
af1f1ec0
DC
7822 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7823 my $err = $@;
7824 die "copy failed: $err" if $err;
5133de42
AD
7825}
7826
7827sub qemu_img_format {
7828 my ($scfg, $volname) = @_;
7829
e0fd2b2f 7830 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
5133de42 7831 return $1;
be190583 7832 } else {
5133de42 7833 return "raw";
5133de42
AD
7834 }
7835}
7836
cfad42af 7837sub qemu_drive_mirror {
bc6c8231 7838 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
cfad42af 7839
5a345967
AD
7840 $jobs = {} if !$jobs;
7841
7842 my $qemu_target;
7843 my $format;
35e4ab04 7844 $jobs->{"drive-$drive"} = {};
152fe752 7845
1e5143de 7846 if ($dst_volid =~ /^nbd:/) {
87955688 7847 $qemu_target = $dst_volid;
5a345967 7848 $format = "nbd";
5a345967 7849 } else {
5a345967
AD
7850 my $storecfg = PVE::Storage::config();
7851 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7852
7853 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
cfad42af 7854
5a345967 7855 $format = qemu_img_format($dst_scfg, $dst_volname);
21ccdb50 7856
5a345967 7857 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
21ccdb50 7858
5a345967
AD
7859 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7860 }
988e2714
WB
7861
7862 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
88383920
DM
7863 $opts->{format} = $format if $format;
7864
bc6c8231
FG
7865 if (defined($src_bitmap)) {
7866 $opts->{sync} = 'incremental';
7867 $opts->{bitmap} = $src_bitmap;
7868 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7869 }
7870
9fa05d31 7871 if (defined($bwlimit)) {
f6409f61
TL
7872 $opts->{speed} = $bwlimit * 1024;
7873 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
9fa05d31
SI
7874 } else {
7875 print "drive mirror is starting for drive-$drive\n";
7876 }
21ccdb50 7877
6dde5ea2 7878 # if a job already runs for this device we get an error, catch it for cleanup
0a13e08e 7879 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
5a345967
AD
7880 if (my $err = $@) {
7881 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
6dde5ea2
TL
7882 warn "$@\n" if $@;
7883 die "mirroring error: $err\n";
5a345967
AD
7884 }
7885
e02fb126 7886 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
5a345967
AD
7887}
7888
db1f8b39
FG
7889# $completion can be either
7890# 'complete': wait until all jobs are ready, block-job-complete them (default)
7891# 'cancel': wait until all jobs are ready, block-job-cancel them
7892# 'skip': wait until all jobs are ready, return with block jobs in ready state
9e671722 7893# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
5a345967 7894sub qemu_drive_mirror_monitor {
9e671722 7895 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
e02fb126 7896
db1f8b39 7897 $completion //= 'complete';
9e671722 7898 $op //= "mirror";
2e953867 7899
08ac653f 7900 eval {
5a345967
AD
7901 my $err_complete = 0;
7902
3b56383b 7903 my $starttime = time ();
08ac653f 7904 while (1) {
9e671722 7905 die "block job ('$op') timed out\n" if $err_complete > 300;
5a345967 7906
0a13e08e 7907 my $stats = mon_cmd($vmid, "query-block-jobs");
3b56383b 7908 my $ctime = time();
08ac653f 7909
9e671722 7910 my $running_jobs = {};
0ea24bf0 7911 for my $stat (@$stats) {
9e671722
SR
7912 next if $stat->{type} ne $op;
7913 $running_jobs->{$stat->{device}} = $stat;
5a345967 7914 }
08ac653f 7915
5a345967 7916 my $readycounter = 0;
67fb9de6 7917
0ea24bf0 7918 for my $job_id (sort keys %$jobs) {
1057fc74 7919 my $job = $running_jobs->{$job_id};
5a345967 7920
1057fc74 7921 my $vanished = !defined($job);
0ea24bf0 7922 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
9e671722 7923 if($complete || ($vanished && $completion eq 'auto')) {
3b56383b 7924 print "$job_id: $op-job finished\n";
0ea24bf0 7925 delete $jobs->{$job_id};
5a345967
AD
7926 next;
7927 }
7928
1057fc74 7929 die "$job_id: '$op' has been cancelled\n" if !defined($job);
f34ebd52 7930
1057fc74
TL
7931 my $busy = $job->{busy};
7932 my $ready = $job->{ready};
7933 if (my $total = $job->{len}) {
7934 my $transferred = $job->{offset} || 0;
5a345967
AD
7935 my $remaining = $total - $transferred;
7936 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
08ac653f 7937
3b56383b
TL
7938 my $duration = $ctime - $starttime;
7939 my $total_h = render_bytes($total, 1);
7940 my $transferred_h = render_bytes($transferred, 1);
7941
7942 my $status = sprintf(
7943 "transferred $transferred_h of $total_h ($percent%%) in %s",
7944 render_duration($duration),
7945 );
7946
7947 if ($ready) {
7948 if ($busy) {
7949 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7950 } else {
7951 $status .= ", ready";
7952 }
7953 }
67daf692
TL
7954 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7955 $jobs->{$job_id}->{ready} = $ready;
5a345967 7956 }
f34ebd52 7957
1057fc74 7958 $readycounter++ if $job->{ready};
5a345967 7959 }
b467f79a 7960
5a345967
AD
7961 last if scalar(keys %$jobs) == 0;
7962
7963 if ($readycounter == scalar(keys %$jobs)) {
9e671722
SR
7964 print "all '$op' jobs are ready\n";
7965
7966 # do the complete later (or has already been done)
7967 last if $completion eq 'skip' || $completion eq 'auto';
5a345967
AD
7968
7969 if ($vmiddst && $vmiddst != $vmid) {
1a988fd2
DC
7970 my $agent_running = $qga && qga_check_running($vmid);
7971 if ($agent_running) {
5619e74a 7972 print "freeze filesystem\n";
0a13e08e 7973 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
d6cdfae4 7974 warn $@ if $@;
5619e74a
AD
7975 } else {
7976 print "suspend vm\n";
7977 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
d6cdfae4 7978 warn $@ if $@;
5619e74a
AD
7979 }
7980
5a345967
AD
7981 # if we clone a disk for a new target vm, we don't switch the disk
7982 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
5619e74a 7983
1a988fd2 7984 if ($agent_running) {
5619e74a 7985 print "unfreeze filesystem\n";
0a13e08e 7986 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
d6cdfae4 7987 warn $@ if $@;
5619e74a
AD
7988 } else {
7989 print "resume vm\n";
d6cdfae4
FE
7990 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7991 warn $@ if $@;
5619e74a
AD
7992 }
7993
2e953867 7994 last;
5a345967
AD
7995 } else {
7996
0ea24bf0 7997 for my $job_id (sort keys %$jobs) {
5a345967 7998 # try to switch the disk if source and destination are on the same guest
0ea24bf0 7999 print "$job_id: Completing block job_id...\n";
5a345967 8000
e02fb126 8001 my $op;
db1f8b39 8002 if ($completion eq 'complete') {
e02fb126 8003 $op = 'block-job-complete';
db1f8b39 8004 } elsif ($completion eq 'cancel') {
e02fb126
ML
8005 $op = 'block-job-cancel';
8006 } else {
8007 die "invalid completion value: $completion\n";
8008 }
0ea24bf0 8009 eval { mon_cmd($vmid, $op, device => $job_id) };
5a345967 8010 if ($@ =~ m/cannot be completed/) {
3b56383b 8011 print "$job_id: block job cannot be completed, trying again.\n";
5a345967
AD
8012 $err_complete++;
8013 }else {
0ea24bf0
TL
8014 print "$job_id: Completed successfully.\n";
8015 $jobs->{$job_id}->{complete} = 1;
5a345967
AD
8016 }
8017 }
2e953867 8018 }
08ac653f 8019 }
08ac653f 8020 sleep 1;
cfad42af 8021 }
08ac653f 8022 };
88383920 8023 my $err = $@;
08ac653f 8024
88383920 8025 if ($err) {
5a345967 8026 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
3b56383b 8027 die "block job ($op) error: $err";
88383920 8028 }
5a345967
AD
8029}
8030
8031sub qemu_blockjobs_cancel {
8032 my ($vmid, $jobs) = @_;
8033
8034 foreach my $job (keys %$jobs) {
bd2d5fe6 8035 print "$job: Cancelling block job\n";
0a13e08e 8036 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
5a345967
AD
8037 $jobs->{$job}->{cancel} = 1;
8038 }
8039
8040 while (1) {
0a13e08e 8041 my $stats = mon_cmd($vmid, "query-block-jobs");
5a345967
AD
8042
8043 my $running_jobs = {};
8044 foreach my $stat (@$stats) {
8045 $running_jobs->{$stat->{device}} = $stat;
8046 }
8047
8048 foreach my $job (keys %$jobs) {
8049
bd2d5fe6
WB
8050 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8051 print "$job: Done.\n";
5a345967
AD
8052 delete $jobs->{$job};
8053 }
8054 }
8055
8056 last if scalar(keys %$jobs) == 0;
8057
8058 sleep 1;
cfad42af
AD
8059 }
8060}
8061
8fbae1dc
FE
8062# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8063# source, but some storages have problems with io_uring, sometimes even leading to crashes.
8064my sub clone_disk_check_io_uring {
8065 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8066
8067 return if !$use_drive_mirror;
8068
8069 # Don't complain when not changing storage.
8070 # Assume if it works for the source, it'll work for the target too.
8071 return if $src_storeid eq $dst_storeid;
8072
8073 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8074 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8075
8076 my $cache_direct = drive_uses_cache_direct($src_drive);
8077
8078 my $src_uses_io_uring;
8079 if ($src_drive->{aio}) {
8080 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8081 } else {
8082 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8083 }
8084
8085 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8086 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8087}
8088
152fe752 8089sub clone_disk {
1196086f
FE
8090 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8091
8092 my ($vmid, $running) = $source->@{qw(vmid running)};
25166060 8093 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
1196086f 8094
25166060 8095 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
1196086f 8096 my ($storage, $format) = $dest->@{qw(storage format)};
152fe752 8097
5f957592
FE
8098 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8099
25166060
FE
8100 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8101 die "cloning from/to EFI disk requires EFI disk\n"
8102 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8103 die "cloning from/to TPM state requires TPM state\n"
8104 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
5f957592
FE
8105
8106 # This would lead to two device nodes in QEMU pointing to the same backing image!
8107 die "cannot change drive name when cloning disk from/to the same VM\n"
8108 if $use_drive_mirror && $vmid == $newvmid;
25166060
FE
8109 }
8110
1d1f8f9a
FE
8111 die "cannot move TPM state while VM is running\n"
8112 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8113
152fe752
DM
8114 my $newvolid;
8115
25166060
FE
8116 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8117 print "$src_drivename " if $src_drivename;
8118 print "($drive->{file})\n";
8119
152fe752 8120 if (!$full) {
258e646c 8121 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
152fe752
DM
8122 push @$newvollist, $newvolid;
8123 } else {
8fbae1dc
FE
8124 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8125 my $storeid = $storage || $src_storeid;
152fe752 8126
44549149 8127 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
152fe752 8128
931432bd 8129 my $name = undef;
d0abc774 8130 my $size = undef;
7fe8b44c
TL
8131 if (drive_is_cloudinit($drive)) {
8132 $name = "vm-$newvmid-cloudinit";
c997e24a
ML
8133 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8134 if ($scfg->{path}) {
8135 $name .= ".$dst_format";
8136 }
7fe8b44c
TL
8137 $snapname = undef;
8138 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
25166060 8139 } elsif ($dst_drivename eq 'efidisk0') {
7344af7b 8140 $size = $efisize or die "internal error - need to specify EFI disk size\n";
25166060 8141 } elsif ($dst_drivename eq 'tpmstate0') {
5f5aba25 8142 $dst_format = 'raw';
f9dde219 8143 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
d0abc774 8144 } else {
8fbae1dc
FE
8145 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8146
efa3aa24 8147 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7fe8b44c 8148 }
b5688f69
FE
8149 $newvolid = PVE::Storage::vdisk_alloc(
8150 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8151 );
152fe752
DM
8152 push @$newvollist, $newvolid;
8153
3999f370 8154 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
1dbd6d30 8155
7fe8b44c 8156 if (drive_is_cloudinit($drive)) {
1b485263
ML
8157 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8158 # if this is the case, we have to complete any block-jobs still there from
8159 # previous drive-mirrors
8160 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8161 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8162 }
7fe8b44c
TL
8163 goto no_data_clone;
8164 }
8165
988e2714 8166 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
5f957592 8167 if ($use_drive_mirror) {
5f957592
FE
8168 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8169 $completion, $qga, $bwlimit);
8170 } else {
25166060 8171 if ($dst_drivename eq 'efidisk0') {
818ce80e
DC
8172 # the relevant data on the efidisk may be smaller than the source
8173 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8174 # that is given by the OVMF_VARS.fd
62375438 8175 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
818ce80e 8176 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
fdfdc80e 8177
62375438
FE
8178 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8179
fdfdc80e
FE
8180 # better for Ceph if block size is not too small, see bug #3324
8181 my $bs = 1024*1024;
8182
62375438 8183 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
a9c45bd4
FE
8184
8185 if ($src_format eq 'qcow2' && $snapname) {
8186 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8187 if !min_version(kvm_user_version(), 6, 2);
8188 push $cmd->@*, '-l', $snapname;
8189 }
62375438
FE
8190 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8191 run_command($cmd);
818ce80e 8192 } else {
56d16f16 8193 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
818ce80e 8194 }
be190583 8195 }
152fe752
DM
8196 }
8197
7fe8b44c 8198no_data_clone:
efa3aa24 8199 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
152fe752 8200
3b53c471
FE
8201 my $disk = dclone($drive);
8202 delete $disk->{format};
152fe752 8203 $disk->{file} = $newvolid;
3bae384f 8204 $disk->{size} = $size if defined($size);
152fe752
DM
8205
8206 return $disk;
8207}
8208
98cfd8b6
AD
8209sub get_running_qemu_version {
8210 my ($vmid) = @_;
0a13e08e 8211 my $res = mon_cmd($vmid, "query-version");
98cfd8b6
AD
8212 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8213}
8214
249c4a6c
AD
8215sub qemu_use_old_bios_files {
8216 my ($machine_type) = @_;
8217
8218 return if !$machine_type;
8219
8220 my $use_old_bios_files = undef;
8221
8222 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8223 $machine_type = $1;
8224 $use_old_bios_files = 1;
8225 } else {
4df98f2f 8226 my $version = extract_version($machine_type, kvm_user_version());
249c4a6c
AD
8227 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8228 # load new efi bios files on migration. So this hack is required to allow
8229 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8230 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
2ea5fb7e 8231 $use_old_bios_files = !min_version($version, 2, 4);
249c4a6c
AD
8232 }
8233
8234 return ($use_old_bios_files, $machine_type);
8235}
8236
818ce80e 8237sub get_efivars_size {
ff84f0e3
FE
8238 my ($conf, $efidisk) = @_;
8239
818ce80e 8240 my $arch = get_vm_arch($conf);
ff84f0e3 8241 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
90b20b15
DC
8242 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8243 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
818ce80e
DC
8244 return -s $ovmf_vars;
8245}
8246
8247sub update_efidisk_size {
8248 my ($conf) = @_;
8249
8250 return if !defined($conf->{efidisk0});
8251
8252 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8253 $disk->{size} = get_efivars_size($conf);
8254 $conf->{efidisk0} = print_drive($disk);
8255
8256 return;
8257}
8258
f9dde219
SR
8259sub update_tpmstate_size {
8260 my ($conf) = @_;
8261
8262 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8263 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8264 $conf->{tpmstate0} = print_drive($disk);
8265}
8266
90b20b15
DC
8267sub create_efidisk($$$$$$$) {
8268 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
3e1f1122 8269
90b20b15 8270 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
3e1f1122 8271
af1f1ec0
DC
8272 my $vars_size_b = -s $ovmf_vars;
8273 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
3e1f1122
TL
8274 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8275 PVE::Storage::activate_volumes($storecfg, [$volid]);
8276
af1f1ec0 8277 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
efa3aa24 8278 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
3e1f1122 8279
340dbcf7 8280 return ($volid, $size/1024);
3e1f1122
TL
8281}
8282
22de899a
AD
8283sub vm_iothreads_list {
8284 my ($vmid) = @_;
8285
0a13e08e 8286 my $res = mon_cmd($vmid, 'query-iothreads');
22de899a
AD
8287
8288 my $iothreads = {};
8289 foreach my $iothread (@$res) {
8290 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8291 }
8292
8293 return $iothreads;
8294}
8295
ee034f5c
AD
8296sub scsihw_infos {
8297 my ($conf, $drive) = @_;
8298
8299 my $maxdev = 0;
8300
7fe1b688 8301 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
ee034f5c 8302 $maxdev = 7;
a1511b3c 8303 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
ee034f5c
AD
8304 $maxdev = 1;
8305 } else {
8306 $maxdev = 256;
8307 }
8308
8309 my $controller = int($drive->{index} / $maxdev);
4df98f2f
TL
8310 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8311 ? "virtioscsi"
8312 : "scsihw";
ee034f5c
AD
8313
8314 return ($maxdev, $controller, $controller_prefix);
8315}
a1511b3c 8316
44549149
EK
8317sub resolve_dst_disk_format {
8318 my ($storecfg, $storeid, $src_volname, $format) = @_;
8319 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8320
8321 if (!$format) {
8322 # if no target format is specified, use the source disk format as hint
8323 if ($src_volname) {
8324 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8325 $format = qemu_img_format($scfg, $src_volname);
8326 } else {
8327 return $defFormat;
8328 }
8329 }
8330
8331 # test if requested format is supported - else use default
8332 my $supported = grep { $_ eq $format } @$validFormats;
8333 $format = $defFormat if !$supported;
8334 return $format;
8335}
8336
66cebc46
DC
8337# NOTE: if this logic changes, please update docs & possibly gui logic
8338sub find_vmstate_storage {
8339 my ($conf, $storecfg) = @_;
8340
8341 # first, return storage from conf if set
8342 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8343
8344 my ($target, $shared, $local);
8345
8346 foreach_storage_used_by_vm($conf, sub {
8347 my ($sid) = @_;
8348 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8349 my $dst = $scfg->{shared} ? \$shared : \$local;
8350 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8351 });
8352
8353 # second, use shared storage where VM has at least one disk
8354 # third, use local storage where VM has at least one disk
8355 # fall back to local storage
8356 $target = $shared // $local // 'local';
8357
8358 return $target;
8359}
8360
6ee499ff 8361sub generate_uuid {
ae2fcb3b
EK
8362 my ($uuid, $uuid_str);
8363 UUID::generate($uuid);
8364 UUID::unparse($uuid, $uuid_str);
6ee499ff
DC
8365 return $uuid_str;
8366}
8367
8368sub generate_smbios1_uuid {
8369 return "uuid=".generate_uuid();
ae2fcb3b
EK
8370}
8371
9c152e87
TL
8372sub nbd_stop {
8373 my ($vmid) = @_;
8374
0a13e08e 8375 mon_cmd($vmid, 'nbd-server-stop');
9c152e87
TL
8376}
8377
dae98db9
DC
8378sub create_reboot_request {
8379 my ($vmid) = @_;
8380 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8381 or die "failed to create reboot trigger file: $!\n";
8382 close($fh);
8383}
8384
8385sub clear_reboot_request {
8386 my ($vmid) = @_;
8387 my $path = "/run/qemu-server/$vmid.reboot";
8388 my $res = 0;
8389
8390 $res = unlink($path);
8391 die "could not remove reboot request for $vmid: $!"
8392 if !$res && $! != POSIX::ENOENT;
8393
8394 return $res;
8395}
8396
5cfa9f5f
SR
8397sub bootorder_from_legacy {
8398 my ($conf, $bootcfg) = @_;
8399
8400 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8401 my $bootindex_hash = {};
8402 my $i = 1;
8403 foreach my $o (split(//, $boot)) {
8404 $bootindex_hash->{$o} = $i*100;
8405 $i++;
8406 }
8407
8408 my $bootorder = {};
8409
8410 PVE::QemuConfig->foreach_volume($conf, sub {
8411 my ($ds, $drive) = @_;
8412
8413 if (drive_is_cdrom ($drive, 1)) {
8414 if ($bootindex_hash->{d}) {
8415 $bootorder->{$ds} = $bootindex_hash->{d};
8416 $bootindex_hash->{d} += 1;
8417 }
8418 } elsif ($bootindex_hash->{c}) {
8419 $bootorder->{$ds} = $bootindex_hash->{c}
8420 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8421 $bootindex_hash->{c} += 1;
8422 }
8423 });
8424
8425 if ($bootindex_hash->{n}) {
8426 for (my $i = 0; $i < $MAX_NETS; $i++) {
8427 my $netname = "net$i";
8428 next if !$conf->{$netname};
8429 $bootorder->{$netname} = $bootindex_hash->{n};
8430 $bootindex_hash->{n} += 1;
8431 }
8432 }
8433
8434 return $bootorder;
8435}
8436
8437# Generate default device list for 'boot: order=' property. Matches legacy
8438# default boot order, but with explicit device names. This is important, since
8439# the fallback for when neither 'order' nor the old format is specified relies
8440# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8441sub get_default_bootdevices {
8442 my ($conf) = @_;
8443
8444 my @ret = ();
8445
8446 # harddisk
8447 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8448 push @ret, $first if $first;
8449
8450 # cdrom
8451 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8452 push @ret, $first if $first;
8453
8454 # network
8455 for (my $i = 0; $i < $MAX_NETS; $i++) {
8456 my $netname = "net$i";
8457 next if !$conf->{$netname};
8458 push @ret, $netname;
8459 last;
8460 }
8461
8462 return \@ret;
8463}
8464
e5d611c3
TL
8465sub device_bootorder {
8466 my ($conf) = @_;
8467
8468 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8469
8470 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8471
8472 my $bootorder = {};
8473 if (!defined($boot) || $boot->{legacy}) {
8474 $bootorder = bootorder_from_legacy($conf, $boot);
8475 } elsif ($boot->{order}) {
8476 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8477 for my $dev (PVE::Tools::split_list($boot->{order})) {
8478 $bootorder->{$dev} = $i++;
8479 }
8480 }
8481
8482 return $bootorder;
8483}
8484
65911545
SR
8485sub register_qmeventd_handle {
8486 my ($vmid) = @_;
8487
8488 my $fh;
8489 my $peer = "/var/run/qmeventd.sock";
8490 my $count = 0;
8491
8492 for (;;) {
8493 $count++;
8494 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8495 last if $fh;
8496 if ($! != EINTR && $! != EAGAIN) {
8497 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8498 }
8499 if ($count > 4) {
8500 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8501 . "after $count retries\n";
8502 }
8503 usleep(25000);
8504 }
8505
8506 # send handshake to mark VM as backing up
8507 print $fh to_json({vzdump => {vmid => "$vmid"}});
8508
8509 # return handle to be closed later when inhibit is no longer required
8510 return $fh;
8511}
8512
65e866e5
DM
8513# bash completion helper
8514
8515sub complete_backup_archives {
8516 my ($cmdname, $pname, $cvalue) = @_;
8517
8518 my $cfg = PVE::Storage::config();
8519
8520 my $storeid;
8521
8522 if ($cvalue =~ m/^([^:]+):/) {
8523 $storeid = $1;
8524 }
8525
8526 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8527
8528 my $res = [];
8529 foreach my $id (keys %$data) {
8530 foreach my $item (@{$data->{$id}}) {
f43a4f12 8531 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
65e866e5
DM
8532 push @$res, $item->{volid} if defined($item->{volid});
8533 }
8534 }
8535
8536 return $res;
8537}
8538
8539my $complete_vmid_full = sub {
8540 my ($running) = @_;
8541
8542 my $idlist = vmstatus();
8543
8544 my $res = [];
8545
8546 foreach my $id (keys %$idlist) {
8547 my $d = $idlist->{$id};
8548 if (defined($running)) {
8549 next if $d->{template};
8550 next if $running && $d->{status} ne 'running';
8551 next if !$running && $d->{status} eq 'running';
8552 }
8553 push @$res, $id;
8554
8555 }
8556 return $res;
8557};
8558
8559sub complete_vmid {
8560 return &$complete_vmid_full();
8561}
8562
8563sub complete_vmid_stopped {
8564 return &$complete_vmid_full(0);
8565}
8566
8567sub complete_vmid_running {
8568 return &$complete_vmid_full(1);
8569}
8570
335af808
DM
8571sub complete_storage {
8572
8573 my $cfg = PVE::Storage::config();
8574 my $ids = $cfg->{ids};
8575
8576 my $res = [];
8577 foreach my $sid (keys %$ids) {
8578 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
c4c844ef 8579 next if !$ids->{$sid}->{content}->{images};
335af808
DM
8580 push @$res, $sid;
8581 }
8582
8583 return $res;
8584}
8585
255e9c54
AL
8586sub complete_migration_storage {
8587 my ($cmd, $param, $current_value, $all_args) = @_;
8588
8589 my $targetnode = @$all_args[1];
8590
8591 my $cfg = PVE::Storage::config();
8592 my $ids = $cfg->{ids};
8593
8594 my $res = [];
8595 foreach my $sid (keys %$ids) {
8596 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8597 next if !$ids->{$sid}->{content}->{images};
8598 push @$res, $sid;
8599 }
8600
8601 return $res;
8602}
8603
b08c37c3
DC
8604sub vm_is_paused {
8605 my ($vmid) = @_;
8606 my $qmpstatus = eval {
8607 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8608 mon_cmd($vmid, "query-status");
8609 };
8610 warn "$@\n" if $@;
8611 return $qmpstatus && $qmpstatus->{status} eq "paused";
8612}
8613
3f11f0d7
LS
8614sub check_volume_storage_type {
8615 my ($storecfg, $vol) = @_;
8616
8617 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8618 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8619 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8620
8621 die "storage '$storeid' does not support content-type '$vtype'\n"
8622 if !$scfg->{content}->{$vtype};
8623
8624 return 1;
8625}
8626
21947fea
AD
8627sub add_nets_bridge_fdb {
8628 my ($conf, $vmid) = @_;
8629
1b5ba4dd
TL
8630 for my $opt (keys %$conf) {
8631 next if $opt !~ m/^net(\d+)$/;
8632 my $iface = "tap${vmid}i$1";
4ddd2ca2
TL
8633 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8634 my $net = parse_net($conf->{$opt}, 1) or next;
8635
8636 my $mac = $net->{macaddr};
8637 if (!$mac) {
8638 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8639 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8640 next;
8641 }
21947fea 8642
f81c9843 8643 my $bridge = $net->{bridge};
bb547dcb
CE
8644 if (!$bridge) {
8645 log_warn("Interface '$iface' not attached to any bridge.");
8646 next;
8647 }
1b5ba4dd 8648 if ($have_sdn) {
f81c9843 8649 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8650 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
1b5ba4dd 8651 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
21947fea
AD
8652 }
8653 }
8654}
1b5ba4dd 8655
73ed6496
AD
8656sub del_nets_bridge_fdb {
8657 my ($conf, $vmid) = @_;
8658
8659 for my $opt (keys %$conf) {
8660 next if $opt !~ m/^net(\d+)$/;
8661 my $iface = "tap${vmid}i$1";
8662
8663 my $net = parse_net($conf->{$opt}) or next;
8664 my $mac = $net->{macaddr} or next;
8665
f81c9843 8666 my $bridge = $net->{bridge};
73ed6496 8667 if ($have_sdn) {
f81c9843 8668 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8669 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
73ed6496
AD
8670 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8671 }
8672 }
8673}
8674
1e3baf05 86751;