X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=c3b682cdd99c8199b16771dc32115dabb9627850;hb=dea4b04c1cf77414dea50c54f2f3f89b7ff71388;hp=2b68d81fe1d7b32cb44ccab37ad51dcdb415276a;hpb=4df15a03770c571ad190674d694b3e029422aeab;p=qemu-server.git diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 2b68d81..c3b682c 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -26,10 +26,10 @@ use Time::HiRes qw(gettimeofday); use URI::Escape; use UUID; -use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file); +use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file); use PVE::DataCenterConfig; use PVE::Exception qw(raise raise_param_exc); -use PVE::GuestHelpers; +use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne); use PVE::INotify; use PVE::JSONSchema qw(get_standard_option); use PVE::ProcFSTools; @@ -37,18 +37,26 @@ use PVE::RPCEnvironment; use PVE::Storage; use PVE::SysFSTools; use PVE::Systemd; -use PVE::Tools qw(run_command lock_file lock_file_full file_read_firstline dir_glob_foreach get_host_arch $IPV6RE); +use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE); use PVE::QMPClient; use PVE::QemuConfig; -use PVE::QemuServer::Helpers qw(min_version); +use PVE::QemuServer::Helpers qw(min_version config_aware_timeout); use PVE::QemuServer::Cloudinit; +use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options); +use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom parse_drive print_drive); use PVE::QemuServer::Machine; use PVE::QemuServer::Memory; use PVE::QemuServer::Monitor qw(mon_cmd); -use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port); +use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci); use PVE::QemuServer::USB qw(parse_usb_device); +my $have_sdn; +eval { + require PVE::Network::SDN::Zones; + $have_sdn = 1; +}; + my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/'; my $OVMF = { x86_64 => [ @@ -63,8 +71,6 @@ my $OVMF = { my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); -my $QEMU_FORMAT_RE = qr/raw|cow|qcow|qcow2|qed|vmdk|cloop/; - # Note about locking: we use flock on the config file protect # against concurent actions. # Aditionaly, we have a 'lock' setting in the config file. This @@ -83,13 +89,6 @@ PVE::JSONSchema::register_standard_option('pve-qm-stateuri', { optional => 1, }); -PVE::JSONSchema::register_standard_option('pve-qm-image-format', { - type => 'string', - enum => [qw(raw cow qcow qed qcow2 vmdk cloop)], - description => "The drive's backing file's data format.", - optional => 1, -}); - PVE::JSONSchema::register_standard_option('pve-qemu-machine', { description => "Specifies the Qemu machine type.", type => 'string', @@ -98,6 +97,28 @@ PVE::JSONSchema::register_standard_option('pve-qemu-machine', { optional => 1, }); + +sub map_storage { + my ($map, $source) = @_; + + return $source if !defined($map); + + return $map->{entries}->{$source} + if $map->{entries} && defined($map->{entries}->{$source}); + + return $map->{default} if $map->{default}; + + # identity (fallback) + return $source; +} + +PVE::JSONSchema::register_standard_option('pve-targetstorage', { + description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.", + type => 'string', + format => 'storagepair-list', + optional => 1, +}); + #no warnings 'redefine'; sub cgroups_write { @@ -114,108 +135,6 @@ sub nodename { return $nodename_cache; } -my $cpu_vendor_list = { - # Intel CPUs - 486 => 'GenuineIntel', - pentium => 'GenuineIntel', - pentium2 => 'GenuineIntel', - pentium3 => 'GenuineIntel', - coreduo => 'GenuineIntel', - core2duo => 'GenuineIntel', - Conroe => 'GenuineIntel', - Penryn => 'GenuineIntel', - Nehalem => 'GenuineIntel', - 'Nehalem-IBRS' => 'GenuineIntel', - Westmere => 'GenuineIntel', - 'Westmere-IBRS' => 'GenuineIntel', - SandyBridge => 'GenuineIntel', - 'SandyBridge-IBRS' => 'GenuineIntel', - IvyBridge => 'GenuineIntel', - 'IvyBridge-IBRS' => 'GenuineIntel', - Haswell => 'GenuineIntel', - 'Haswell-IBRS' => 'GenuineIntel', - 'Haswell-noTSX' => 'GenuineIntel', - 'Haswell-noTSX-IBRS' => 'GenuineIntel', - Broadwell => 'GenuineIntel', - 'Broadwell-IBRS' => 'GenuineIntel', - 'Broadwell-noTSX' => 'GenuineIntel', - 'Broadwell-noTSX-IBRS' => 'GenuineIntel', - 'Skylake-Client' => 'GenuineIntel', - 'Skylake-Client-IBRS' => 'GenuineIntel', - 'Skylake-Server' => 'GenuineIntel', - 'Skylake-Server-IBRS' => 'GenuineIntel', - 'Cascadelake-Server' => 'GenuineIntel', - KnightsMill => 'GenuineIntel', - - - # AMD CPUs - athlon => 'AuthenticAMD', - phenom => 'AuthenticAMD', - Opteron_G1 => 'AuthenticAMD', - Opteron_G2 => 'AuthenticAMD', - Opteron_G3 => 'AuthenticAMD', - Opteron_G4 => 'AuthenticAMD', - Opteron_G5 => 'AuthenticAMD', - EPYC => 'AuthenticAMD', - 'EPYC-IBPB' => 'AuthenticAMD', - - # generic types, use vendor from host node - host => 'default', - kvm32 => 'default', - kvm64 => 'default', - qemu32 => 'default', - qemu64 => 'default', - max => 'default', -}; - -my @supported_cpu_flags = ( - 'pcid', - 'spec-ctrl', - 'ibpb', - 'ssbd', - 'virt-ssbd', - 'amd-ssbd', - 'amd-no-ssb', - 'pdpe1gb', - 'md-clear', - 'hv-tlbflush', - 'hv-evmcs', - 'aes' -); -my $cpu_flag = qr/[+-](@{[join('|', @supported_cpu_flags)]})/; - -my $cpu_fmt = { - cputype => { - description => "Emulated CPU type.", - type => 'string', - enum => [ sort { "\L$a" cmp "\L$b" } keys %$cpu_vendor_list ], - default => 'kvm64', - default_key => 1, - }, - hidden => { - description => "Do not identify as a KVM virtual machine.", - type => 'boolean', - optional => 1, - default => 0 - }, - 'hv-vendor-id' => { - type => 'string', - pattern => qr/[a-zA-Z0-9]{1,12}/, - format_description => 'vendor-id', - description => 'The Hyper-V vendor ID. Some drivers or programs inside Windows guests need a specific ID.', - optional => 1, - }, - flags => { - description => "List of additional CPU flags separated by ';'." - . " Use '+FLAG' to enable, '-FLAG' to disable a flag." - . " Currently supported flags: @{[join(', ', @supported_cpu_flags)]}.", - format_description => '+FLAG[;-FLAG...]', - type => 'string', - pattern => qr/$cpu_flag(;$cpu_flag)*/, - optional => 1, - }, -}; - my $watchdog_fmt = { model => { default_key => 1, @@ -320,6 +239,43 @@ my $spice_enhancements_fmt = { }, }; +my $rng_fmt = { + source => { + type => 'string', + enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'], + default_key => 1, + description => "The file on the host to gather entropy from. In most" + . " cases /dev/urandom should be preferred over /dev/random" + . " to avoid entropy-starvation issues on the host. Using" + . " urandom does *not* decrease security in any meaningful" + . " way, as it's still seeded from real entropy, and the" + . " bytes provided will most likely be mixed with real" + . " entropy on the guest as well. /dev/hwrng can be used" + . " to pass through a hardware RNG from the host.", + }, + max_bytes => { + type => 'integer', + description => "Maximum bytes of entropy injected into the guest every" + . " 'period' milliseconds. Prefer a lower value when using" + . " /dev/random as source. Use 0 to disable limiting" + . " (potentially dangerous!).", + optional => 1, + + # default is 1 KiB/s, provides enough entropy to the guest to avoid + # boot-starvation issues (e.g. systemd etc...) while allowing no chance + # of overwhelming the host, provided we're reading from /dev/urandom + default => 1024, + }, + period => { + type => 'integer', + description => "Every 'period' milliseconds the entropy-injection quota" + . " is reset, allowing the guest to retrieve another" + . " 'max_bytes' of entropy.", + optional => 1, + default => 1000, + }, +}; + my $confdesc = { onboot => { optional => 1, @@ -611,7 +567,7 @@ EODESCR optional => 1, description => "Emulated CPU type.", type => 'string', - format => $cpu_fmt, + format => 'pve-vm-cpu-conf', }, parent => get_standard_option('pve-snapshot-name', { optional => 1, @@ -633,8 +589,15 @@ EODESCR optional => 1, }), runningmachine => get_standard_option('pve-qemu-machine', { - description => "Specifies the Qemu machine type of the running vm. This is used internally for snapshots.", + description => "Specifies the QEMU machine type of the running vm. This is used internally for snapshots.", }), + runningcpu => { + description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used internally for snapshots.", + optional => 1, + type => 'string', + pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re, + format_description => 'QEMU -cpu parameter' + }, machine => get_standard_option('pve-qemu-machine'), arch => { description => "Virtual processor architecture. Defaults to the host.", @@ -708,6 +671,12 @@ EODESCR description => 'Tags of the VM. This is only meta information.', optional => 1, }, + rng0 => { + type => 'string', + format => $rng_fmt, + description => "Configure a VirtIO-based Random Number Generator.", + optional => 1, + }, }; my $cicustom_fmt = { @@ -797,14 +766,8 @@ while (my ($k, $v) = each %$confdesc) { PVE::JSONSchema::register_standard_option("pve-qm-$k", $v); } -my $MAX_IDE_DISKS = 4; -my $MAX_SCSI_DISKS = 14; -my $MAX_VIRTIO_DISKS = 16; -my $MAX_SATA_DISKS = 6; my $MAX_USB_DEVICES = 5; my $MAX_NETS = 32; -my $MAX_UNUSED_DISKS = 256; -my $MAX_HOSTPCI_DEVICES = 16; my $MAX_SERIAL_PORTS = 4; my $MAX_PARALLEL_PORTS = 3; my $MAX_NUMA = 8; @@ -882,6 +845,7 @@ my $net_fmt = { type => 'string', description => $net_fmt_bridge_descr, format_description => 'bridge', + pattern => '[-_.\w\d]+', optional => 1, }, queues => { @@ -919,6 +883,12 @@ my $net_fmt = { description => 'Whether this interface should be disconnected (like pulling the plug).', optional => 1, }, + mtu => { + type => 'integer', + minimum => 1, maximum => 65520, + description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU", + optional => 1, + }, }; my $netdesc = { @@ -1006,310 +976,6 @@ sub verify_volume_id_or_qm_path { return $volid; } -my $drivename_hash; - -my %drivedesc_base = ( - volume => { alias => 'file' }, - file => { - type => 'string', - format => 'pve-volume-id-or-qm-path', - default_key => 1, - format_description => 'volume', - description => "The drive's backing volume.", - }, - media => { - type => 'string', - enum => [qw(cdrom disk)], - description => "The drive's media type.", - default => 'disk', - optional => 1 - }, - cyls => { - type => 'integer', - description => "Force the drive's physical geometry to have a specific cylinder count.", - optional => 1 - }, - heads => { - type => 'integer', - description => "Force the drive's physical geometry to have a specific head count.", - optional => 1 - }, - secs => { - type => 'integer', - description => "Force the drive's physical geometry to have a specific sector count.", - optional => 1 - }, - trans => { - type => 'string', - enum => [qw(none lba auto)], - description => "Force disk geometry bios translation mode.", - optional => 1, - }, - snapshot => { - type => 'boolean', - description => "Controls qemu's snapshot mode feature." - . " If activated, changes made to the disk are temporary and will" - . " be discarded when the VM is shutdown.", - optional => 1, - }, - cache => { - type => 'string', - enum => [qw(none writethrough writeback unsafe directsync)], - description => "The drive's cache mode", - optional => 1, - }, - format => get_standard_option('pve-qm-image-format'), - size => { - type => 'string', - format => 'disk-size', - format_description => 'DiskSize', - description => "Disk size. This is purely informational and has no effect.", - optional => 1, - }, - backup => { - type => 'boolean', - description => "Whether the drive should be included when making backups.", - optional => 1, - }, - replicate => { - type => 'boolean', - description => 'Whether the drive should considered for replication jobs.', - optional => 1, - default => 1, - }, - rerror => { - type => 'string', - enum => [qw(ignore report stop)], - description => 'Read error action.', - optional => 1, - }, - werror => { - type => 'string', - enum => [qw(enospc ignore report stop)], - description => 'Write error action.', - optional => 1, - }, - aio => { - type => 'string', - enum => [qw(native threads)], - description => 'AIO type to use.', - optional => 1, - }, - discard => { - type => 'string', - enum => [qw(ignore on)], - description => 'Controls whether to pass discard/trim requests to the underlying storage.', - optional => 1, - }, - detect_zeroes => { - type => 'boolean', - description => 'Controls whether to detect and try to optimize writes of zeroes.', - optional => 1, - }, - serial => { - type => 'string', - format => 'urlencoded', - format_description => 'serial', - maxLength => 20*3, # *3 since it's %xx url enoded - description => "The drive's reported serial number, url-encoded, up to 20 bytes long.", - optional => 1, - }, - shared => { - type => 'boolean', - description => 'Mark this locally-managed volume as available on all nodes', - verbose_description => "Mark this locally-managed volume as available on all nodes.\n\nWARNING: This option does not share the volume automatically, it assumes it is shared already!", - optional => 1, - default => 0, - } -); - -my %iothread_fmt = ( iothread => { - type => 'boolean', - description => "Whether to use iothreads for this drive", - optional => 1, -}); - -my %model_fmt = ( - model => { - type => 'string', - format => 'urlencoded', - format_description => 'model', - maxLength => 40*3, # *3 since it's %xx url enoded - description => "The drive's reported model name, url-encoded, up to 40 bytes long.", - optional => 1, - }, -); - -my %queues_fmt = ( - queues => { - type => 'integer', - description => "Number of queues.", - minimum => 2, - optional => 1 - } -); - -my %scsiblock_fmt = ( - scsiblock => { - type => 'boolean', - description => "whether to use scsi-block for full passthrough of host block device\n\nWARNING: can lead to I/O errors in combination with low memory or high memory fragmentation on host", - optional => 1, - default => 0, - }, -); - -my %ssd_fmt = ( - ssd => { - type => 'boolean', - description => "Whether to expose this drive as an SSD, rather than a rotational hard disk.", - optional => 1, - }, -); - -my %wwn_fmt = ( - wwn => { - type => 'string', - pattern => qr/^(0x)[0-9a-fA-F]{16}/, - format_description => 'wwn', - description => "The drive's worldwide name, encoded as 16 bytes hex string, prefixed by '0x'.", - optional => 1, - }, -); - -my $add_throttle_desc = sub { - my ($key, $type, $what, $unit, $longunit, $minimum) = @_; - my $d = { - type => $type, - format_description => $unit, - description => "Maximum $what in $longunit.", - optional => 1, - }; - $d->{minimum} = $minimum if defined($minimum); - $drivedesc_base{$key} = $d; -}; -# throughput: (leaky bucket) -$add_throttle_desc->('bps', 'integer', 'r/w speed', 'bps', 'bytes per second'); -$add_throttle_desc->('bps_rd', 'integer', 'read speed', 'bps', 'bytes per second'); -$add_throttle_desc->('bps_wr', 'integer', 'write speed', 'bps', 'bytes per second'); -$add_throttle_desc->('mbps', 'number', 'r/w speed', 'mbps', 'megabytes per second'); -$add_throttle_desc->('mbps_rd', 'number', 'read speed', 'mbps', 'megabytes per second'); -$add_throttle_desc->('mbps_wr', 'number', 'write speed', 'mbps', 'megabytes per second'); -$add_throttle_desc->('iops', 'integer', 'r/w I/O', 'iops', 'operations per second'); -$add_throttle_desc->('iops_rd', 'integer', 'read I/O', 'iops', 'operations per second'); -$add_throttle_desc->('iops_wr', 'integer', 'write I/O', 'iops', 'operations per second'); - -# pools: (pool of IO before throttling starts taking effect) -$add_throttle_desc->('mbps_max', 'number', 'unthrottled r/w pool', 'mbps', 'megabytes per second'); -$add_throttle_desc->('mbps_rd_max', 'number', 'unthrottled read pool', 'mbps', 'megabytes per second'); -$add_throttle_desc->('mbps_wr_max', 'number', 'unthrottled write pool', 'mbps', 'megabytes per second'); -$add_throttle_desc->('iops_max', 'integer', 'unthrottled r/w I/O pool', 'iops', 'operations per second'); -$add_throttle_desc->('iops_rd_max', 'integer', 'unthrottled read I/O pool', 'iops', 'operations per second'); -$add_throttle_desc->('iops_wr_max', 'integer', 'unthrottled write I/O pool', 'iops', 'operations per second'); - -# burst lengths -$add_throttle_desc->('bps_max_length', 'integer', 'length of I/O bursts', 'seconds', 'seconds', 1); -$add_throttle_desc->('bps_rd_max_length', 'integer', 'length of read I/O bursts', 'seconds', 'seconds', 1); -$add_throttle_desc->('bps_wr_max_length', 'integer', 'length of write I/O bursts', 'seconds', 'seconds', 1); -$add_throttle_desc->('iops_max_length', 'integer', 'length of I/O bursts', 'seconds', 'seconds', 1); -$add_throttle_desc->('iops_rd_max_length', 'integer', 'length of read I/O bursts', 'seconds', 'seconds', 1); -$add_throttle_desc->('iops_wr_max_length', 'integer', 'length of write I/O bursts', 'seconds', 'seconds', 1); - -# legacy support -$drivedesc_base{'bps_rd_length'} = { alias => 'bps_rd_max_length' }; -$drivedesc_base{'bps_wr_length'} = { alias => 'bps_wr_max_length' }; -$drivedesc_base{'iops_rd_length'} = { alias => 'iops_rd_max_length' }; -$drivedesc_base{'iops_wr_length'} = { alias => 'iops_wr_max_length' }; - -my $ide_fmt = { - %drivedesc_base, - %model_fmt, - %ssd_fmt, - %wwn_fmt, -}; -PVE::JSONSchema::register_format("pve-qm-ide", $ide_fmt); - -my $idedesc = { - optional => 1, - type => 'string', format => $ide_fmt, - description => "Use volume as IDE hard disk or CD-ROM (n is 0 to " .($MAX_IDE_DISKS -1) . ").", -}; -PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc); - -my $scsi_fmt = { - %drivedesc_base, - %iothread_fmt, - %queues_fmt, - %scsiblock_fmt, - %ssd_fmt, - %wwn_fmt, -}; -my $scsidesc = { - optional => 1, - type => 'string', format => $scsi_fmt, - description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to " . ($MAX_SCSI_DISKS - 1) . ").", -}; -PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); - -my $sata_fmt = { - %drivedesc_base, - %ssd_fmt, - %wwn_fmt, -}; -my $satadesc = { - optional => 1, - type => 'string', format => $sata_fmt, - description => "Use volume as SATA hard disk or CD-ROM (n is 0 to " . ($MAX_SATA_DISKS - 1). ").", -}; -PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc); - -my $virtio_fmt = { - %drivedesc_base, - %iothread_fmt, -}; -my $virtiodesc = { - optional => 1, - type => 'string', format => $virtio_fmt, - description => "Use volume as VIRTIO hard disk (n is 0 to " . ($MAX_VIRTIO_DISKS - 1) . ").", -}; -PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc); - -my $alldrive_fmt = { - %drivedesc_base, - %iothread_fmt, - %model_fmt, - %queues_fmt, - %scsiblock_fmt, - %ssd_fmt, - %wwn_fmt, -}; - -my $efidisk_fmt = { - volume => { alias => 'file' }, - file => { - type => 'string', - format => 'pve-volume-id-or-qm-path', - default_key => 1, - format_description => 'volume', - description => "The drive's backing volume.", - }, - format => get_standard_option('pve-qm-image-format'), - size => { - type => 'string', - format => 'disk-size', - format_description => 'DiskSize', - description => "Disk size. This is purely informational and has no effect.", - optional => 1, - }, -}; - -my $efidisk_desc = { - optional => 1, - type => 'string', format => $efidisk_fmt, - description => "Configure a Disk for storing EFI vars", -}; - -PVE::JSONSchema::register_standard_option("pve-qm-efidisk", $efidisk_desc); - my $usb_fmt = { host => { default_key => 1, @@ -1344,76 +1010,6 @@ my $usbdesc = { }; PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc); -my $PCIRE = qr/([a-f0-9]{4}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/; -my $hostpci_fmt = { - host => { - default_key => 1, - type => 'string', - pattern => qr/$PCIRE(;$PCIRE)*/, - format_description => 'HOSTPCIID[;HOSTPCIID2...]', - description => < { - type => 'boolean', - description => "Specify whether or not the device's ROM will be visible in the guest's memory map.", - optional => 1, - default => 1, - }, - romfile => { - type => 'string', - pattern => '[^,;]+', - format_description => 'string', - description => "Custom pci device rom filename (must be located in /usr/share/kvm/).", - optional => 1, - }, - pcie => { - type => 'boolean', - description => "Choose the PCI-express bus (needs the 'q35' machine model).", - optional => 1, - default => 0, - }, - 'x-vga' => { - type => 'boolean', - description => "Enable vfio-vga device support.", - optional => 1, - default => 0, - }, - 'mdev' => { - type => 'string', - format_description => 'string', - pattern => '[^/\.:]+', - optional => 1, - description => < 1, - type => 'string', format => 'pve-qm-hostpci', - description => "Map host PCI devices into guest.", - verbose_description => < 1, type => 'string', @@ -1452,47 +1048,18 @@ for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) { $confdesc->{"serial$i"} = $serialdesc; } -for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { - $confdesc->{"hostpci$i"} = $hostpcidesc; -} - -for (my $i = 0; $i < $MAX_IDE_DISKS; $i++) { - $drivename_hash->{"ide$i"} = 1; - $confdesc->{"ide$i"} = $idedesc; -} - -for (my $i = 0; $i < $MAX_SATA_DISKS; $i++) { - $drivename_hash->{"sata$i"} = 1; - $confdesc->{"sata$i"} = $satadesc; -} - -for (my $i = 0; $i < $MAX_SCSI_DISKS; $i++) { - $drivename_hash->{"scsi$i"} = 1; - $confdesc->{"scsi$i"} = $scsidesc ; +for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) { + $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc; } -for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) { - $drivename_hash->{"virtio$i"} = 1; - $confdesc->{"virtio$i"} = $virtiodesc; +for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) { + $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key}; } -$drivename_hash->{efidisk0} = 1; -$confdesc->{efidisk0} = $efidisk_desc; - for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) { $confdesc->{"usb$i"} = $usbdesc; } -my $unuseddesc = { - optional => 1, - type => 'string', format => 'pve-volume-id', - description => "Reference to unused volumes. This is used internally, and should not be modified manually.", -}; - -for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) { - $confdesc->{"unused$i"} = $unuseddesc; -} - my $kvm_api_version = 0; sub kvm_version { @@ -1541,21 +1108,6 @@ sub kernel_has_vhost_net { return -c '/dev/vhost-net'; } -sub valid_drive_names { - # order is important - used to autoselect boot disk - return ((map { "ide$_" } (0 .. ($MAX_IDE_DISKS - 1))), - (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))), - (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))), - (map { "sata$_" } (0 .. ($MAX_SATA_DISKS - 1))), - 'efidisk0'); -} - -sub is_valid_drivename { - my $dev = shift; - - return defined($drivename_hash->{$dev}); -} - sub option_exists { my $key = shift; return defined($confdesc->{$key}); @@ -1672,110 +1224,21 @@ sub pve_verify_hotplug_features { die "unable to parse hotplug option\n"; } -# ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]] -# [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no] -# [,rerror=ignore|report|stop][,werror=enospc|ignore|report|stop] -# [,aio=native|threads][,discard=ignore|on][,detect_zeroes=on|off] -# [,iothread=on][,serial=serial][,model=model] - -sub parse_drive { - my ($key, $data) = @_; +sub scsi_inquiry { + my($fh, $noerr) = @_; - my ($interface, $index); + my $SG_IO = 0x2285; + my $SG_GET_VERSION_NUM = 0x2282; - if ($key =~ m/^([^\d]+)(\d+)$/) { - $interface = $1; - $index = $2; - } else { + my $versionbuf = "\x00" x 8; + my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf); + if (!$ret) { + die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr; return undef; } - - my $desc = $key =~ /^unused\d+$/ ? $alldrive_fmt - : $confdesc->{$key}->{format}; - if (!$desc) { - warn "invalid drive key: $key\n"; - return undef; - } - my $res = eval { PVE::JSONSchema::parse_property_string($desc, $data) }; - return undef if !$res; - $res->{interface} = $interface; - $res->{index} = $index; - - my $error = 0; - foreach my $opt (qw(bps bps_rd bps_wr)) { - if (my $bps = defined(delete $res->{$opt})) { - if (defined($res->{"m$opt"})) { - warn "both $opt and m$opt specified\n"; - ++$error; - next; - } - $res->{"m$opt"} = sprintf("%.3f", $bps / (1024*1024.0)); - } - } - - # can't use the schema's 'requires' because of the mbps* => bps* "transforming aliases" - for my $requirement ( - [mbps_max => 'mbps'], - [mbps_rd_max => 'mbps_rd'], - [mbps_wr_max => 'mbps_wr'], - [miops_max => 'miops'], - [miops_rd_max => 'miops_rd'], - [miops_wr_max => 'miops_wr'], - [bps_max_length => 'mbps_max'], - [bps_rd_max_length => 'mbps_rd_max'], - [bps_wr_max_length => 'mbps_wr_max'], - [iops_max_length => 'iops_max'], - [iops_rd_max_length => 'iops_rd_max'], - [iops_wr_max_length => 'iops_wr_max']) { - my ($option, $requires) = @$requirement; - if ($res->{$option} && !$res->{$requires}) { - warn "$option requires $requires\n"; - ++$error; - } - } - - return undef if $error; - - return undef if $res->{mbps_rd} && $res->{mbps}; - return undef if $res->{mbps_wr} && $res->{mbps}; - return undef if $res->{iops_rd} && $res->{iops}; - return undef if $res->{iops_wr} && $res->{iops}; - - if ($res->{media} && ($res->{media} eq 'cdrom')) { - return undef if $res->{snapshot} || $res->{trans} || $res->{format}; - return undef if $res->{heads} || $res->{secs} || $res->{cyls}; - return undef if $res->{interface} eq 'virtio'; - } - - if (my $size = $res->{size}) { - return undef if !defined($res->{size} = PVE::JSONSchema::parse_size($size)); - } - - return $res; -} - -sub print_drive { - my ($drive) = @_; - my $data = { %$drive }; - delete $data->{$_} for qw(index interface); - return PVE::JSONSchema::print_property_string($data, $alldrive_fmt); -} - -sub scsi_inquiry { - my($fh, $noerr) = @_; - - my $SG_IO = 0x2285; - my $SG_GET_VERSION_NUM = 0x2282; - - my $versionbuf = "\x00" x 8; - my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf); - if (!$ret) { - die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr; - return undef; - } - my $version = unpack("I", $versionbuf); - if ($version < 30000) { - die "scsi generic interface too old\n" if !$noerr; + my $version = unpack("I", $versionbuf); + if ($version < 30000) { + die "scsi generic interface too old\n" if !$noerr; return undef; } @@ -1898,7 +1361,7 @@ sub print_drivedevice_full { $device .= ",wwn=$drive->{wwn}" if $drive->{wwn}; } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') { - my $maxdev = ($drive->{interface} eq 'sata') ? $MAX_SATA_DISKS : 2; + my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2; my $controller = int($drive->{index} / $maxdev); my $unit = $drive->{index} % $maxdev; my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd"; @@ -1953,7 +1416,7 @@ sub get_initiator_name { return $initiator; } -sub print_drive_full { +sub print_drive_commandline_full { my ($storecfg, $vmid, $drive) = @_; my $path; @@ -2065,6 +1528,22 @@ sub print_netdevice_full { } $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ; + if (my $mtu = $net->{mtu}) { + if ($net->{model} eq 'virtio' && $net->{bridge}) { + my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge}); + if ($mtu == 1) { + $mtu = $bridge_mtu; + } elsif ($mtu < 576) { + die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n"; + } elsif ($mtu > $bridge_mtu) { + die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n"; + } + $tmpstr .= ",host_mtu=$mtu"; + } else { + warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n"; + } + } + if ($use_old_bios_files) { my $romfile; if ($device eq 'virtio-net-pci') { @@ -2121,26 +1600,6 @@ sub print_netdev_full { return $netdev; } - -sub print_cpu_device { - my ($conf, $id) = @_; - - my $kvm = $conf->{kvm} // 1; - my $cpu = $kvm ? "kvm64" : "qemu64"; - if (my $cputype = $conf->{cpu}) { - my $cpuconf = PVE::JSONSchema::parse_property_string($cpu_fmt, $cputype) - or die "Cannot parse cpu description: $cputype\n"; - $cpu = $cpuconf->{cputype}; - } - - my $cores = $conf->{cores} || 1; - - my $current_core = ($id - 1) % $cores; - my $current_socket = int(($id - 1 - $current_core)/$cores); - - return "$cpu-x86_64-cpu,id=cpu$id,socket-id=$current_socket,core-id=$current_core,thread-id=0"; -} - my $vga_map = { 'cirrus' => 'cirrus-vga', 'std' => 'VGA', @@ -2203,20 +1662,6 @@ sub print_vga_device { return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}"; } -sub drive_is_cloudinit { - my ($drive) = @_; - return $drive->{file} =~ m@[:/]vm-\d+-cloudinit(?:\.$QEMU_FORMAT_RE)?$@; -} - -sub drive_is_cdrom { - my ($drive, $exclude_cloudinit) = @_; - - return 0 if $exclude_cloudinit && drive_is_cloudinit($drive); - - return $drive && $drive->{media} && ($drive->{media} eq 'cdrom'); - -} - sub parse_number_sets { my ($set) = @_; my $res = []; @@ -2240,23 +1685,6 @@ sub parse_numa { return $res; } -sub parse_hostpci { - my ($value) = @_; - - return undef if !$value; - - my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value); - - my @idlist = split(/;/, $res->{host}); - delete $res->{host}; - foreach my $id (@idlist) { - my $devs = PVE::SysFSTools::lspci($id); - die "no PCI device found for '$id'\n" if !scalar(@$devs); - push @{$res->{pciid}}, @$devs; - } - return $res; -} - # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate= sub parse_net { my ($data) = @_; @@ -2426,17 +1854,6 @@ sub print_smbios1 { PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt); -PVE::JSONSchema::register_format('pve-qm-bootdisk', \&verify_bootdisk); -sub verify_bootdisk { - my ($value, $noerr) = @_; - - return $value if is_valid_drivename($value); - - return undef if $noerr; - - die "invalid boot disk '$value'\n"; -} - sub parse_watchdog { my ($value) = @_; @@ -2469,6 +1886,16 @@ sub parse_vga { return $res; } +sub parse_rng { + my ($value) = @_; + + return undef if !$value; + + my $res = eval { PVE::JSONSchema::parse_property_string($rng_fmt, $value) }; + warn $@ if $@; + return $res; +} + PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device); sub verify_usb_device { my ($value, $noerr) = @_; @@ -2485,7 +1912,8 @@ sub json_config_properties { my $prop = shift; foreach my $opt (keys %$confdesc) { - next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'vmstate' || $opt eq 'runningmachine'; + next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'vmstate' || + $opt eq 'runningmachine' || $opt eq 'runningcpu'; $prop->{$opt} = $confdesc->{$opt}; } @@ -2544,7 +1972,7 @@ sub destroy_vm { if ($conf->{template}) { # check if any base image is still used by a linked clone - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; return if drive_is_cdrom($drive); @@ -2558,7 +1986,7 @@ sub destroy_vm { } # only remove disks owned by this VM - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; return if drive_is_cdrom($drive, 1); @@ -2847,7 +2275,7 @@ sub check_local_resources { sub check_storage_availability { my ($storecfg, $conf, $node) = @_; - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; my $volid = $drive->{file}; @@ -2870,7 +2298,7 @@ sub shared_nodes { my $nodehash = { map { $_ => 1 } @$nodelist }; my $nodename = nodename(); - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; my $volid = $drive->{file}; @@ -2902,7 +2330,7 @@ sub check_local_storage_availability { my $nodelist = PVE::Cluster::get_nodelist(); my $nodehash = { map { $_ => {} } @$nodelist }; - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; my $volid = $drive->{file}; @@ -2961,26 +2389,6 @@ sub vzlist { return $vzlist; } -sub disksize { - my ($storecfg, $conf) = @_; - - my $bootdisk = $conf->{bootdisk}; - return undef if !$bootdisk; - return undef if !is_valid_drivename($bootdisk); - - return undef if !$conf->{$bootdisk}; - - my $drive = parse_drive($bootdisk, $conf->{$bootdisk}); - return undef if !defined($drive); - - return undef if drive_is_cdrom($drive); - - my $volid = $drive->{file}; - return undef if !$volid; - - return $drive->{size}; -} - our $vmstatus_return_properties = { vmid => get_standard_option('pve-vmid'), status => { @@ -3068,7 +2476,7 @@ sub vmstatus { # fixme: better status? $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped'; - my $size = disksize($storecfg, $conf); + my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf); if (defined($size)) { $d->{disk} = 0; # no info available $d->{maxdisk} = $size; @@ -3247,65 +2655,6 @@ sub vmstatus { return $res; } -sub foreach_drive { - my ($conf, $func, @param) = @_; - - foreach my $ds (valid_drive_names()) { - next if !defined($conf->{$ds}); - - my $drive = parse_drive($ds, $conf->{$ds}); - next if !$drive; - - &$func($ds, $drive, @param); - } -} - -sub foreach_volid { - my ($conf, $func, @param) = @_; - - my $volhash = {}; - - my $test_volid = sub { - my ($volid, $is_cdrom, $replicate, $shared, $snapname, $size) = @_; - - return if !$volid; - - $volhash->{$volid}->{cdrom} //= 1; - $volhash->{$volid}->{cdrom} = 0 if !$is_cdrom; - - $volhash->{$volid}->{replicate} //= 0; - $volhash->{$volid}->{replicate} = 1 if $replicate; - - $volhash->{$volid}->{shared} //= 0; - $volhash->{$volid}->{shared} = 1 if $shared; - - $volhash->{$volid}->{referenced_in_config} //= 0; - $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname); - - $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1 - if defined($snapname); - $volhash->{$volid}->{size} = $size if $size; - }; - - foreach_drive($conf, sub { - my ($ds, $drive) = @_; - $test_volid->($drive->{file}, drive_is_cdrom($drive), $drive->{replicate} // 1, $drive->{shared}, undef, $drive->{size}); - }); - - foreach my $snapname (keys %{$conf->{snapshots}}) { - my $snap = $conf->{snapshots}->{$snapname}; - $test_volid->($snap->{vmstate}, 0, 1, $snapname); - foreach_drive($snap, sub { - my ($ds, $drive) = @_; - $test_volid->($drive->{file}, drive_is_cdrom($drive), $drive->{replicate} // 1, $drive->{shared}, $snapname); - }); - } - - foreach my $volid (keys %$volhash) { - &$func($volid, $volhash->{$volid}, @param); - } -} - sub conf_has_serial { my ($conf) = @_; @@ -3336,6 +2685,32 @@ sub conf_has_audio { }; } +sub audio_devs { + my ($audio, $audiopciaddr, $machine_version) = @_; + + my $devs = []; + + my $id = $audio->{dev_id}; + my $audiodev = ""; + if (min_version($machine_version, 4, 2)) { + $audiodev = ",audiodev=$audio->{backend_id}"; + } + + if ($audio->{dev} eq 'AC97') { + push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev"; + } elsif ($audio->{dev} =~ /intel\-hda$/) { + push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}"; + push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev"; + push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev"; + } else { + die "unkown audio device '$audio->{dev}', implement me!"; + } + + push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}"; + + return $devs; +} + sub vga_conf_has_spice { my ($vga) = @_; @@ -3362,14 +2737,24 @@ my $default_machines = { }; sub get_vm_machine { - my ($conf, $forcemachine, $arch, $add_pve_version) = @_; + my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_; my $machine = $forcemachine || $conf->{machine}; if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) { $arch //= 'x86_64'; $machine ||= $default_machines->{$arch}; - $machine .= "+pve$PVE::QemuServer::Machine::PVE_MACHINE_VERSION" if $add_pve_version; + if ($add_pve_version) { + $kvmversion //= kvm_user_version(); + my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion); + $machine .= "+pve$pvever"; + } + } + + if ($add_pve_version && $machine !~ m/\+pve\d+$/) { + # for version-pinned machines that do not include a pve-version (e.g. + # pc-q35-4.1), we assume 0 to keep them stable in case we bump + $machine .= '+pve0'; } return $machine; @@ -3397,63 +2782,127 @@ sub get_command_for_arch($) { return $cmd; } -sub get_cpu_options { - my ($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough) = @_; +# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags +# to use in a QEMU command line (-cpu element), first array_intersect the result +# of query_supported_ with query_understood_. This is necessary because: +# +# a) query_understood_ returns flags the host cannot use and +# b) query_supported_ (rather the QMP call) doesn't actually return CPU +# flags, but CPU settings - with most of them being flags. Those settings +# (and some flags, curiously) cannot be specified as a "-cpu" argument. +# +# query_supported_ needs to start up to 2 temporary VMs and is therefore rather +# expensive. If you need the value returned from this, you can get it much +# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with +# $accel being 'kvm' or 'tcg'. +# +# pvestatd calls this function on startup and whenever the QEMU/KVM version +# changes, automatically populating pmxcfs. +# +# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] } +# since kvm and tcg machines support different flags +# +sub query_supported_cpu_flags { + my ($arch) = @_; + + $arch //= get_host_arch(); + my $default_machine = $default_machines->{$arch}; - my $cpuFlags = []; - my $ostype = $conf->{ostype}; + my $flags = {}; - my $cpu = $kvm ? "kvm64" : "qemu64"; - if ($arch eq 'aarch64') { - $cpu = 'cortex-a57'; - } - my $hv_vendor_id; - if (my $cputype = $conf->{cpu}) { - my $cpuconf = PVE::JSONSchema::parse_property_string($cpu_fmt, $cputype) - or die "Cannot parse cpu description: $cputype\n"; - $cpu = $cpuconf->{cputype}; - $kvm_off = 1 if $cpuconf->{hidden}; - $hv_vendor_id = $cpuconf->{'hv-vendor-id'}; + # FIXME: Once this is merged, the code below should work for ARM as well: + # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html + die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if + $arch eq "aarch64"; + + my $kvm_supported = defined(kvm_version()); + my $qemu_cmd = get_command_for_arch($arch); + my $fakevmid = -1; + my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid); - if (defined(my $flags = $cpuconf->{flags})) { - push @$cpuFlags, split(";", $flags); + # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command + my $query_supported_run_qemu = sub { + my ($kvm) = @_; + + my $flags = {}; + my $cmd = [ + $qemu_cmd, + '-machine', $default_machine, + '-display', 'none', + '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server,nowait", + '-mon', 'chardev=qmp,mode=control', + '-pidfile', $pidfile, + '-S', '-daemonize' + ]; + + if (!$kvm) { + push @$cmd, '-accel', 'tcg'; } - } - push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64' && $arch eq 'x86_64'; + my $rc = run_command($cmd, noerr => 1, quiet => 0); + die "QEMU flag querying VM exited with code " . $rc if $rc; - push @$cpuFlags , '-x2apic' if $ostype && $ostype eq 'solaris'; + eval { + my $cmd_result = mon_cmd( + $fakevmid, + 'query-cpu-model-expansion', + type => 'full', + model => { name => 'host' } + ); + + my $props = $cmd_result->{model}->{props}; + foreach my $prop (keys %$props) { + next if $props->{$prop} ne '1'; + # QEMU returns some flags multiple times, with '_', '.' or '-' + # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...). + # We only keep those with underscores, to match /proc/cpuinfo + $prop =~ s/\.|-/_/g; + $flags->{$prop} = 1; + } + }; + my $err = $@; - push @$cpuFlags, '+sep' if $cpu eq 'kvm64' || $cpu eq 'kvm32'; + # force stop with 10 sec timeout and 'nocheck' + # always stop, even if QMP failed + vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1); - push @$cpuFlags, '-rdtscp' if $cpu =~ m/^Opteron/; + die $err if $err; - if (min_version($machine_version, 2, 3) && $arch eq 'x86_64') { + return [ sort keys %$flags ]; + }; - push @$cpuFlags , '+kvm_pv_unhalt' if $kvm; - push @$cpuFlags , '+kvm_pv_eoi' if $kvm; - } + # We need to query QEMU twice, since KVM and TCG have different supported flags + PVE::QemuConfig->lock_config($fakevmid, sub { + $flags->{tcg} = eval { $query_supported_run_qemu->(0) }; + warn "warning: failed querying supported tcg flags: $@\n" if $@; - add_hyperv_enlightenments($cpuFlags, $winversion, $machine_version, $conf->{bios}, $gpu_passthrough, $hv_vendor_id) if $kvm; + if ($kvm_supported) { + $flags->{kvm} = eval { $query_supported_run_qemu->(1) }; + warn "warning: failed querying supported kvm flags: $@\n" if $@; + } + }); - push @$cpuFlags, 'enforce' if $cpu ne 'host' && $kvm && $arch eq 'x86_64'; + return $flags; +} - push @$cpuFlags, 'kvm=off' if $kvm_off; +# Understood CPU flags are written to a file at 'pve-qemu' compile time +my $understood_cpu_flag_dir = "/usr/share/kvm"; +sub query_understood_cpu_flags { + my $arch = get_host_arch(); + my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch"; - if (my $cpu_vendor = $cpu_vendor_list->{$cpu}) { - push @$cpuFlags, "vendor=${cpu_vendor}" - if $cpu_vendor ne 'default'; - } elsif ($arch ne 'aarch64') { - die "internal error"; # should not happen - } + die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n" + if ! -e $filepath; - $cpu .= "," . join(',', @$cpuFlags) if scalar(@$cpuFlags); + my $raw = file_get_contents($filepath); + $raw =~ s/^\s+|\s+$//g; + my @flags = split(/\s+/, $raw); - return ('-cpu', $cpu); + return \@flags; } sub config_to_command { - my ($storecfg, $vmid, $conf, $defaults, $forcemachine) = @_; + my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu) = @_; my $cmd = []; my $globalFlags = []; @@ -3462,7 +2911,6 @@ sub config_to_command { my $devices = []; my $pciaddr = ''; my $bridges = {}; - my $vernum = 0; # unknown my $ostype = $conf->{ostype}; my $winversion = windows_version($ostype); my $kvm = $conf->{kvm}; @@ -3472,6 +2920,11 @@ sub config_to_command { my $kvm_binary = get_command_for_arch($arch); my $kvmver = kvm_user_version($kvm_binary); + if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) { + $kvmver //= "undefined"; + die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n"; + } + my $add_pve_version = min_version($kvmver, 4, 1); my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version); @@ -3479,22 +2932,36 @@ sub config_to_command { $kvm //= 1 if is_native($arch); $machine_version =~ m/(\d+)\.(\d+)/; - die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type', please upgrade node '$nodename'\n" - if !PVE::QemuServer::min_version($kvmver, $1, $2); + my ($machine_major, $machine_minor) = ($1, $2); + + if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) { + warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n"; + } elsif (!min_version($kvmver, $machine_major, $machine_minor)) { + die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type', please upgrade node '$nodename'\n" + } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) { + my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version); + die "Installed qemu-server (max feature level for $machine_major.$machine_minor is pve$max_pve_version)" + ." is too old to run machine type '$machine_type', please upgrade node '$nodename'\n"; + } + + # if a specific +pve version is required for a feature, use $version_guard + # instead of min_version to allow machines to be run with the minimum + # required version + my $required_pve_version = 0; + my $version_guard = sub { + my ($major, $minor, $pve) = @_; + return 0 if !min_version($machine_version, $major, $minor, $pve); + my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor"); + return 1 if min_version($machine_version, $major, $minor, $max_pve+1); + $required_pve_version = $pve if $pve && $pve > $required_pve_version; + return 1; + }; if ($kvm) { die "KVM virtualisation configured, but not available. Either disable in VM configuration or enable in BIOS.\n" if !defined kvm_version(); } - if ($kvmver =~ m/^(\d+)\.(\d+)$/) { - $vernum = $1*1000000+$2*1000; - } elsif ($kvmver =~ m/^(\d+)\.(\d+)\.(\d+)$/) { - $vernum = $1*1000000+$2*1000+$3; - } - - die "detected old qemu-kvm binary ($kvmver)\n" if $vernum < 15000; - my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf); my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); my $use_old_bios_files = undef; @@ -3549,18 +3016,13 @@ sub config_to_command { } } - if ($conf->{vmgenid}) { - push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid}; - } - - my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch); if ($conf->{bios} && $conf->{bios} eq 'ovmf') { - die "uefi base image not found\n" if ! -f $ovmf_code; + my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch); + die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code; - my $path; - my $format; + my ($path, $format); if (my $efidisk = $conf->{efidisk0}) { - my $d = PVE::JSONSchema::parse_property_string($efidisk_fmt, $efidisk); + my $d = parse_drive('efidisk0', $efidisk); my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1); $format = $d->{format}; if ($storeid) { @@ -3581,8 +3043,14 @@ sub config_to_command { $format = 'raw'; } + my $size_str = ""; + + if ($format eq 'raw' && $version_guard->(4, 1, 2)) { + $size_str = ",size=" . (-s $ovmf_vars); + } + push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly,file=$ovmf_code"; - push @$cmd, '-drive', "if=pflash,unit=1,format=$format,id=drive-efidisk0,file=$path"; + push @$cmd, '-drive', "if=pflash,unit=1,format=$format,id=drive-efidisk0$size_str,file=$path"; } # load q35 config @@ -3595,6 +3063,10 @@ sub config_to_command { } } + if ($conf->{vmgenid}) { + push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid}; + } + # add usb controllers my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers($conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES); push @$devices, @usbcontrollers if @usbcontrollers; @@ -3629,77 +3101,9 @@ sub config_to_command { push @$devices, '-device', $kbd if defined($kbd); } - my $kvm_off = 0; - my $gpu_passthrough; - - # host pci devices - for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { - my $id = "hostpci$i"; - my $d = parse_hostpci($conf->{$id}); - next if !$d; - - if (my $pcie = $d->{pcie}) { - die "q35 machine model is not enabled" if !$q35; - # win7 wants to have the pcie devices directly on the pcie bus - # instead of in the root port - if ($winversion == 7) { - $pciaddr = print_pcie_addr("${id}bus0"); - } else { - # add more root ports if needed, 4 are present by default - # by pve-q35 cfgs, rest added here on demand. - if ($i > 3) { - push @$devices, '-device', print_pcie_root_port($i); - } - $pciaddr = print_pcie_addr($id); - } - } else { - $pciaddr = print_pci_addr($id, $bridges, $arch, $machine_type); - } - - my $xvga = ''; - if ($d->{'x-vga'}) { - $xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf'); - $kvm_off = 1; - $vga->{type} = 'none' if !defined($conf->{vga}); - $gpu_passthrough = 1; - } - - my $pcidevices = $d->{pciid}; - my $multifunction = 1 if @$pcidevices > 1; - - my $sysfspath; - if ($d->{mdev} && scalar(@$pcidevices) == 1) { - my $pci_id = $pcidevices->[0]->{id}; - my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i); - $sysfspath = "/sys/bus/pci/devices/$pci_id/$uuid"; - } elsif ($d->{mdev}) { - warn "ignoring mediated device '$id' with multifunction device\n"; - } - - my $j=0; - foreach my $pcidevice (@$pcidevices) { - my $devicestr = "vfio-pci"; - - if ($sysfspath) { - $devicestr .= ",sysfsdev=$sysfspath"; - } else { - $devicestr .= ",host=$pcidevice->{id}"; - } - - my $mf_addr = $multifunction ? ".$j" : ''; - $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}"; - - if ($j == 0) { - $devicestr .= ',rombar=0' if defined($d->{rombar}) && !$d->{rombar}; - $devicestr .= "$xvga"; - $devicestr .= ",multifunction=on" if $multifunction; - $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile}; - } - - push @$devices, '-device', $devicestr; - $j++; - } - } + # host pci device passthrough + my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices( + $conf, $devices, $winversion, $q35, $bridges, $arch, $machine_type); # usb devices my $usb_dev_features = {}; @@ -3739,22 +3143,10 @@ sub config_to_command { } } - if (my $audio = conf_has_audio($conf)) { - + if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) { my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type); - - my $id = $audio->{dev_id}; - if ($audio->{dev} eq 'AC97') { - push @$devices, '-device', "AC97,id=${id}${audiopciaddr}"; - } elsif ($audio->{dev} =~ /intel\-hda$/) { - push @$devices, '-device', "$audio->{dev},id=${id}${audiopciaddr}"; - push @$devices, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0"; - push @$devices, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1"; - } else { - die "unkown audio device '$audio->{dev}', implement me!"; - } - - push @$devices, '-audiodev', "$audio->{backend},id=$audio->{backend_id}"; + my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version); + push @$devices, @$audio_devs; } my $sockets = 1; @@ -3812,7 +3204,6 @@ sub config_to_command { # time drift fix my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf}; - my $useLocaltime = $conf->{localtime}; if ($winversion >= 5) { # windows @@ -3831,21 +3222,17 @@ sub config_to_command { push @$rtcFlags, 'driftfix=slew' if $tdf; - if (!$kvm) { - push @$machineFlags, 'accel=tcg'; - } - - if ($machine_type) { - push @$machineFlags, "type=${machine_type}"; - } - - if (($conf->{startdate}) && ($conf->{startdate} ne 'now')) { + if ($conf->{startdate} && $conf->{startdate} ne 'now') { push @$rtcFlags, "base=$conf->{startdate}"; } elsif ($useLocaltime) { push @$rtcFlags, 'base=localtime'; } - push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough); + if ($forcecpu) { + push @$cmd, '-cpu', $forcecpu; + } else { + push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough); + } PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd); @@ -3868,12 +3255,28 @@ sub config_to_command { } } + my $rng = parse_rng($conf->{rng0}) if $conf->{rng0}; + if ($rng && &$version_guard(4, 1, 2)) { + check_rng_source($rng->{source}); + + my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default}; + my $period = $rng->{period} // $rng_fmt->{period}->{default}; + my $limiter_str = ""; + if ($max_bytes) { + $limiter_str = ",max-bytes=$max_bytes,period=$period"; + } + + my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type); + push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0"; + push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr"; + } + my $spice_port; if ($qxlnum) { if ($qxlnum > 1) { if ($winversion){ - for(my $i = 1; $i < $qxlnum; $i++){ + for (my $i = 1; $i < $qxlnum; $i++){ push @$devices, '-device', print_vga_device($conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges); } } else { @@ -3936,7 +3339,7 @@ sub config_to_command { push @$devices, '-iscsi', "initiator-name=$initiator"; } - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; if (PVE::Storage::parse_volume_id($drive->{file}, 1)) { @@ -3960,14 +3363,17 @@ sub config_to_command { } } - if($drive->{interface} eq 'virtio'){ + if ($drive->{interface} eq 'virtio'){ push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread}; } - if ($drive->{interface} eq 'scsi') { + if ($drive->{interface} eq 'scsi') { my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); + die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n" + if $drive->{index} > 13 && !&$version_guard(4, 1, 2); + $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type); my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw; @@ -3986,37 +3392,37 @@ sub config_to_command { push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues" if !$scsicontroller->{$controller}; $scsicontroller->{$controller}=1; - } + } if ($drive->{interface} eq 'sata') { - my $controller = int($drive->{index} / $MAX_SATA_DISKS); - $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type); - push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr" if !$ahcicontroller->{$controller}; - $ahcicontroller->{$controller}=1; + my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS); + $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type); + push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr" if !$ahcicontroller->{$controller}; + $ahcicontroller->{$controller}=1; } - my $drive_cmd = print_drive_full($storecfg, $vmid, $drive); + my $drive_cmd = print_drive_commandline_full($storecfg, $vmid, $drive); push @$devices, '-drive',$drive_cmd; push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type); }); for (my $i = 0; $i < $MAX_NETS; $i++) { - next if !$conf->{"net$i"}; - my $d = parse_net($conf->{"net$i"}); - next if !$d; + next if !$conf->{"net$i"}; + my $d = parse_net($conf->{"net$i"}); + next if !$d; - $use_virtio = 1 if $d->{model} eq 'virtio'; + $use_virtio = 1 if $d->{model} eq 'virtio'; - if ($bootindex_hash->{n}) { - $d->{bootindex} = $bootindex_hash->{n}; - $bootindex_hash->{n} += 1; - } + if ($bootindex_hash->{n}) { + $d->{bootindex} = $bootindex_hash->{n}; + $bootindex_hash->{n} += 1; + } - my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, "net$i"); - push @$devices, '-netdev', $netdevfull; + my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, "net$i"); + push @$devices, '-netdev', $netdevfull; - my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files, $arch, $machine_type); - push @$devices, '-device', $netdevicefull; + my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files, $arch, $machine_type); + push @$devices, '-device', $netdevicefull; } if ($conf->{ivshmem}) { @@ -4036,6 +3442,9 @@ sub config_to_command { push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path,size=$ivshmem->{size}M"; } + # pci.4 is nested in pci.1 + $bridges->{1} = 1 if $bridges->{4}; + if (!$q35) { # add pci bridges if (min_version($machine_version, 2, 3)) { @@ -4045,19 +3454,41 @@ sub config_to_command { $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/; - for my $k (sort {$b cmp $a} keys %$bridges) { - $pciaddr = print_pci_addr("pci.$k", undef, $arch, $machine_type); - unshift @$devices, '-device', "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" if $k > 0; + } + + for my $k (sort {$b cmp $a} keys %$bridges) { + next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3 + + my $k_name = $k; + if ($k == 2 && $legacy_igd) { + $k_name = "$k-igd"; + } + $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type); + + my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr"; + if ($q35) { + # add after -readconfig pve-q35.cfg + splice @$devices, 2, 0, '-device', $devstr; + } else { + unshift @$devices, '-device', $devstr if $k > 0; } } + if (!$kvm) { + push @$machineFlags, 'accel=tcg'; + } + + my $machine_type_min = $machine_type; + if ($add_pve_version) { + $machine_type_min =~ s/\+pve\d+$//; + $machine_type_min .= "+pve$required_pve_version"; + } + push @$machineFlags, "type=${machine_type_min}"; + push @$cmd, @$devices; - push @$cmd, '-rtc', join(',', @$rtcFlags) - if scalar(@$rtcFlags); - push @$cmd, '-machine', join(',', @$machineFlags) - if scalar(@$machineFlags); - push @$cmd, '-global', join(',', @$globalFlags) - if scalar(@$globalFlags); + push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); + push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags); + push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags); if (my $vmstate = $conf->{vmstate}) { my $statepath = PVE::Storage::path($storecfg, $vmstate); @@ -4075,6 +3506,24 @@ sub config_to_command { return wantarray ? ($cmd, $vollist, $spice_port) : $cmd; } +sub check_rng_source { + my ($source) = @_; + + # mostly relevant for /dev/hwrng, but doesn't hurt to check others too + die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n" + if ! -e $source; + + my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current'; + if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') { + # Needs to abort, otherwise QEMU crashes on first rng access. + # Note that rng_current cannot be changed to 'none' manually, so + # once the VM is past this point, it is no longer an issue. + die "Cannot start VM with passed-through RNG device: '/dev/hwrng'" + . " exists, but '$rng_current' is set to 'none'. Ensure that" + . " a compatible hardware-RNG is attached to the host.\n"; + } +} + sub spice_port { my ($vmid) = @_; @@ -4355,7 +3804,7 @@ sub qemu_objectdel { sub qemu_driveadd { my ($storecfg, $vmid, $device) = @_; - my $drive = print_drive_full($storecfg, $vmid, $device); + my $drive = print_drive_commandline_full($storecfg, $vmid, $device); $drive =~ s/\\/\\\\/g; my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\""); @@ -4436,8 +3885,8 @@ sub qemu_deletescsihw { my $devices_list = vm_devices_list($vmid); foreach my $opt (keys %{$devices_list}) { - if (PVE::QemuServer::is_valid_drivename($opt)) { - my $drive = PVE::QemuServer::parse_drive($opt, $conf->{$opt}); + if (is_valid_drivename($opt)) { + my $drive = parse_drive($opt, $conf->{$opt}); if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) { return 1; } @@ -4488,6 +3937,14 @@ sub qemu_netdevadd { my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1); my %options = split(/[=,]/, $netdev); + if (defined(my $vhost = $options{vhost})) { + $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost)); + } + + if (defined(my $queues = $options{queues})) { + $options{queues} = $queues + 0; + } + mon_cmd($vmid, "netdev_add", %options); return 1; } @@ -4550,7 +4007,7 @@ sub qemu_cpu_hotplug { my $retry = 0; my $currentrunningvcpus = undef; while (1) { - $currentrunningvcpus = mon_cmd($vmid, "query-cpus"); + $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast"); last if scalar(@{$currentrunningvcpus}) == $i-1; raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5; $retry++; @@ -4567,7 +4024,7 @@ sub qemu_cpu_hotplug { return; } - my $currentrunningvcpus = mon_cmd($vmid, "query-cpus"); + my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast"); die "vcpus in running vm does not match its configuration\n" if scalar(@{$currentrunningvcpus}) != $currentvcpus; @@ -4580,7 +4037,7 @@ sub qemu_cpu_hotplug { my $retry = 0; my $currentrunningvcpus = undef; while (1) { - $currentrunningvcpus = mon_cmd($vmid, "query-cpus"); + $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast"); last if scalar(@{$currentrunningvcpus}) == $i; raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10; sleep 1; @@ -4672,6 +4129,9 @@ sub qemu_block_resize { return if !$running; + my $padding = (1024 - $size % 1024) % 1024; + $size = $size + $padding; + mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size)); } @@ -4697,7 +4157,7 @@ sub qemu_volume_snapshot_delete { $running = undef; my $conf = PVE::QemuConfig->load_config($vmid); - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; $running = 1 if $drive->{file} eq $volid; }); @@ -4735,15 +4195,68 @@ sub set_migration_caps { mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref); } -my $fast_plug_option = { - 'lock' => 1, - 'name' => 1, - 'onboot' => 1, - 'shares' => 1, - 'startup' => 1, - 'description' => 1, - 'protection' => 1, - 'vmstatestorage' => 1, +sub foreach_volid { + my ($conf, $func, @param) = @_; + + my $volhash = {}; + + my $test_volid = sub { + my ($key, $drive, $snapname) = @_; + + my $volid = $drive->{file}; + return if !$volid; + + $volhash->{$volid}->{cdrom} //= 1; + $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive); + + my $replicate = $drive->{replicate} // 1; + $volhash->{$volid}->{replicate} //= 0; + $volhash->{$volid}->{replicate} = 1 if $replicate; + + $volhash->{$volid}->{shared} //= 0; + $volhash->{$volid}->{shared} = 1 if $drive->{shared}; + + $volhash->{$volid}->{referenced_in_config} //= 0; + $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname); + + $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1 + if defined($snapname); + + my $size = $drive->{size}; + $volhash->{$volid}->{size} //= $size if $size; + + $volhash->{$volid}->{is_vmstate} //= 0; + $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate'; + + $volhash->{$volid}->{is_unused} //= 0; + $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/; + }; + + my $include_opts = { + extra_keys => ['vmstate'], + include_unused => 1, + }; + + PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid); + foreach my $snapname (keys %{$conf->{snapshots}}) { + my $snap = $conf->{snapshots}->{$snapname}; + PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname); + } + + foreach my $volid (keys %$volhash) { + &$func($volid, $volhash->{$volid}, @param); + } +} + +my $fast_plug_option = { + 'lock' => 1, + 'name' => 1, + 'onboot' => 1, + 'shares' => 1, + 'startup' => 1, + 'description' => 1, + 'protection' => 1, + 'vmstatestorage' => 1, 'hookscript' => 1, 'tags' => 1, }; @@ -4911,7 +4424,7 @@ sub vmconfig_hotplug_pending { &$apply_pending_cloudinit($opt, $value); } vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk}, - $vmid, $opt, $value, 1, $arch, $machine_type); + $vmid, $opt, $value, $arch, $machine_type); } elsif ($opt =~ m/^memory$/) { #dimms die "skip\n" if !$hotplug_features->{memory}; $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value); @@ -4946,7 +4459,7 @@ sub try_deallocate_drive { # check if the disk is really unused die "unable to delete '$volid' - volume is still in use (snapshot?)\n" - if is_volume_in_use($storecfg, $conf, $key, $volid); + if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid); PVE::Storage::vdisk_free($storecfg, $volid); return 1; } else { @@ -4977,69 +4490,54 @@ sub vmconfig_delete_or_detach_drive { sub vmconfig_apply_pending { - my ($vmid, $conf, $storecfg) = @_; + my ($vmid, $conf, $storecfg, $errors) = @_; + + my $add_apply_error = sub { + my ($opt, $msg) = @_; + my $err_msg = "unable to apply pending change $opt : $msg"; + $errors->{$opt} = $err_msg; + warn $err_msg; + }; # cold plug my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete}); foreach my $opt (sort keys %$pending_delete_hash) { - die "internal error" if $opt =~ m/^unused/; my $force = $pending_delete_hash->{$opt}->{force}; - $conf = PVE::QemuConfig->load_config($vmid); # update/reload - if (!defined($conf->{$opt})) { - PVE::QemuConfig->remove_from_pending_delete($conf, $opt); - PVE::QemuConfig->write_config($vmid, $conf); - } elsif (is_valid_drivename($opt)) { - vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); - PVE::QemuConfig->remove_from_pending_delete($conf, $opt); - delete $conf->{$opt}; - PVE::QemuConfig->write_config($vmid, $conf); + eval { + if ($opt =~ m/^unused/) { + die "internal error"; + } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) { + vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); + } + }; + if (my $err = $@) { + $add_apply_error->($opt, $err); } else { PVE::QemuConfig->remove_from_pending_delete($conf, $opt); delete $conf->{$opt}; - PVE::QemuConfig->write_config($vmid, $conf); } } - $conf = PVE::QemuConfig->load_config($vmid); # update/reload + PVE::QemuConfig->cleanup_pending($conf); foreach my $opt (keys %{$conf->{pending}}) { # add/change - $conf = PVE::QemuConfig->load_config($vmid); # update/reload - - if (defined($conf->{$opt}) && ($conf->{$opt} eq $conf->{pending}->{$opt})) { - # skip if nothing changed - } elsif (is_valid_drivename($opt)) { - vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt})) - if defined($conf->{$opt}); - $conf->{$opt} = $conf->{pending}->{$opt}; + next if $opt eq 'delete'; # just to be sure + eval { + if (defined($conf->{$opt}) && is_valid_drivename($opt)) { + vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt})) + } + }; + if (my $err = $@) { + $add_apply_error->($opt, $err); } else { - $conf->{$opt} = $conf->{pending}->{$opt}; + $conf->{$opt} = delete $conf->{pending}->{$opt}; } - - delete $conf->{pending}->{$opt}; - PVE::QemuConfig->write_config($vmid, $conf); } -} - -my $safe_num_ne = sub { - my ($a, $b) = @_; - return 0 if !defined($a) && !defined($b); - return 1 if !defined($a); - return 1 if !defined($b); - - return $a != $b; -}; - -my $safe_string_ne = sub { - my ($a, $b) = @_; - - return 0 if !defined($a) && !defined($b); - return 1 if !defined($a); - return 1 if !defined($b); - - return $a ne $b; -}; + # write all changes at once to avoid unnecessary i/o + PVE::QemuConfig->write_config($vmid, $conf); +} sub vmconfig_update_net { my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_; @@ -5049,9 +4547,9 @@ sub vmconfig_update_net { if ($conf->{$opt}) { my $oldnet = parse_net($conf->{$opt}); - if (&$safe_string_ne($oldnet->{model}, $newnet->{model}) || - &$safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) || - &$safe_num_ne($oldnet->{queues}, $newnet->{queues}) || + if (safe_string_ne($oldnet->{model}, $newnet->{model}) || + safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) || + safe_num_ne($oldnet->{queues}, $newnet->{queues}) || !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change # for non online change, we try to hot-unplug @@ -5062,19 +4560,24 @@ sub vmconfig_update_net { die "internal error" if $opt !~ m/net(\d+)/; my $iface = "tap${vmid}i$1"; - if (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || - &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) || - &$safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) || - &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) { + if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || + safe_num_ne($oldnet->{tag}, $newnet->{tag}) || + safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) || + safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) { PVE::Network::tap_unplug($iface); - PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); - } elsif (&$safe_num_ne($oldnet->{rate}, $newnet->{rate})) { + + if ($have_sdn) { + PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); + } else { + PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); + } + } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) { # Rate can be applied on its own but any change above needs to # include the rate in tap_plug since OVS resets everything. PVE::Network::tap_rate_limit($iface, $newnet->{rate}); } - if (&$safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) { + if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) { qemu_set_link_status($vmid, $opt, !$newnet->{link_down}); } @@ -5090,9 +4593,7 @@ sub vmconfig_update_net { } sub vmconfig_update_disk { - my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $force, $arch, $machine_type) = @_; - - # fixme: do we need force? + my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_; my $drive = parse_drive($opt, $value); @@ -5118,32 +4619,33 @@ sub vmconfig_update_disk { # update existing disk # skip non hotpluggable value - if (&$safe_string_ne($drive->{discard}, $old_drive->{discard}) || - &$safe_string_ne($drive->{iothread}, $old_drive->{iothread}) || - &$safe_string_ne($drive->{queues}, $old_drive->{queues}) || - &$safe_string_ne($drive->{cache}, $old_drive->{cache})) { + if (safe_string_ne($drive->{discard}, $old_drive->{discard}) || + safe_string_ne($drive->{iothread}, $old_drive->{iothread}) || + safe_string_ne($drive->{queues}, $old_drive->{queues}) || + safe_string_ne($drive->{cache}, $old_drive->{cache}) || + safe_string_ne($drive->{ssd}, $old_drive->{ssd})) { die "skip\n"; } # apply throttle - if (&$safe_num_ne($drive->{mbps}, $old_drive->{mbps}) || - &$safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) || - &$safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) || - &$safe_num_ne($drive->{iops}, $old_drive->{iops}) || - &$safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) || - &$safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) || - &$safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) || - &$safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) || - &$safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) || - &$safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) || - &$safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) || - &$safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) || - &$safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) || - &$safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) || - &$safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) || - &$safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) || - &$safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) || - &$safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) { + if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) || + safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) || + safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) || + safe_num_ne($drive->{iops}, $old_drive->{iops}) || + safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) || + safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) || + safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) || + safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) || + safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) || + safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) || + safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) || + safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) || + safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) || + safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) || + safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) || + safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) || + safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) || + safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) { qemu_block_set_io_throttle($vmid,"drive-$opt", ($drive->{mbps} || 0)*1024*1024, @@ -5173,14 +4675,20 @@ sub vmconfig_update_disk { } else { # cdrom if ($drive->{file} eq 'none') { - mon_cmd($vmid, "eject",force => JSON::true,device => "drive-$opt"); + mon_cmd($vmid, "eject", force => JSON::true, id => "$opt"); if (drive_is_cloudinit($old_drive)) { vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive); } } else { my $path = get_iso_path($storecfg, $vmid, $drive->{file}); - mon_cmd($vmid, "eject", force => JSON::true,device => "drive-$opt"); # force eject if locked - mon_cmd($vmid, "change", device => "drive-$opt",target => "$path") if $path; + + # force eject if locked + mon_cmd($vmid, "eject", force => JSON::true, id => "$opt"); + + if ($path) { + mon_cmd($vmid, "blockdev-change-medium", + id => "$opt", filename => "$path"); + } } return 1; @@ -5194,342 +4702,434 @@ sub vmconfig_update_disk { vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type); } -sub vm_start { - my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, - $forcemachine, $spice_ticket, $migration_network, $migration_type, $targetstorage) = @_; +# called in locked context by incoming migration +sub vm_migrate_get_nbd_disks { + my ($storecfg, $conf, $replicated_volumes) = @_; - PVE::QemuConfig->lock_config($vmid, sub { - my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom); + my $local_volumes = {}; + PVE::QemuConfig->foreach_volume($conf, sub { + my ($ds, $drive) = @_; - die "you can't start a vm if it's a template\n" if PVE::QemuConfig->is_template($conf); + return if drive_is_cdrom($drive); - my $is_suspended = PVE::QemuConfig->has_lock($conf, 'suspended'); + my $volid = $drive->{file}; - PVE::QemuConfig->check_lock($conf) - if !($skiplock || $is_suspended); + return if !$volid; - die "VM $vmid already running\n" if check_running($vmid, undef, $migratedfrom); + my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid); - # clean up leftover reboot request files - eval { clear_reboot_request($vmid); }; - warn $@ if $@; + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + return if $scfg->{shared}; - if (!$statefile && scalar(keys %{$conf->{pending}})) { - vmconfig_apply_pending($vmid, $conf, $storecfg); - $conf = PVE::QemuConfig->load_config($vmid); # update/reload - } + # replicated disks re-use existing state via bitmap + my $use_existing = $replicated_volumes->{$volid} ? 1 : 0; + $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing]; + }); + return $local_volumes; +} - PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid); +# called in locked context by incoming migration +sub vm_migrate_alloc_nbd_disks { + my ($storecfg, $vmid, $source_volumes, $storagemap) = @_; - my $defaults = load_defaults(); + my $format = undef; - # set environment variable useful inside network script - $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom; + my $nbd = {}; + foreach my $opt (sort keys %$source_volumes) { + my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}}; - my $local_volumes = {}; + if ($use_existing) { + $nbd->{$opt}->{drivestr} = print_drive($drive); + $nbd->{$opt}->{volid} = $volid; + $nbd->{$opt}->{replicated} = 1; + next; + } - if ($targetstorage) { - foreach_drive($conf, sub { - my ($ds, $drive) = @_; + # If a remote storage is specified and the format of the original + # volume is not available there, fall back to the default format. + # Otherwise use the same format as the original. + if (!$storagemap->{identity}) { + $storeid = map_storage($storagemap, $storeid); + my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + my $fileFormat = qemu_img_format($scfg, $volname); + $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat; + } else { + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + $format = qemu_img_format($scfg, $volname); + } - return if drive_is_cdrom($drive); + my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, ($drive->{size}/1024)); + my $newdrive = $drive; + $newdrive->{format} = $format; + $newdrive->{file} = $newvolid; + my $drivestr = print_drive($newdrive); + $nbd->{$opt}->{drivestr} = $drivestr; + $nbd->{$opt}->{volid} = $newvolid; + } - my $volid = $drive->{file}; + return $nbd; +} - return if !$volid; +# see vm_start_nolock for parameters, additionally: +# migrate_opts: +# storagemap = parsed storage map for allocating NBD disks +sub vm_start { + my ($storecfg, $vmid, $params, $migrate_opts) = @_; - my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid); + return PVE::QemuConfig->lock_config($vmid, sub { + my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom}); - my $scfg = PVE::Storage::storage_config($storecfg, $storeid); - return if $scfg->{shared}; - $local_volumes->{$ds} = [$volid, $storeid, $volname]; - }); + die "you can't start a vm if it's a template\n" if PVE::QemuConfig->is_template($conf); - my $format = undef; + $params->{resume} = PVE::QemuConfig->has_lock($conf, 'suspended'); - foreach my $opt (sort keys %$local_volumes) { + PVE::QemuConfig->check_lock($conf) + if !($params->{skiplock} || $params->{resume}); - my ($volid, $storeid, $volname) = @{$local_volumes->{$opt}}; - my $drive = parse_drive($opt, $conf->{$opt}); + die "VM $vmid already running\n" if check_running($vmid, undef, $migrate_opts->{migratedfrom}); - #if remote storage is specified, use default format - if ($targetstorage && $targetstorage ne "1") { - $storeid = $targetstorage; - my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); - $format = $defFormat; - } else { - #else we use same format than original - my $scfg = PVE::Storage::storage_config($storecfg, $storeid); - $format = qemu_img_format($scfg, $volid); - } + if (my $storagemap = $migrate_opts->{storagemap}) { + my $replicated = $migrate_opts->{replicated_volumes}; + my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated); + $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap); - my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, ($drive->{size}/1024)); - my $newdrive = $drive; - $newdrive->{format} = $format; - $newdrive->{file} = $newvolid; - my $drivestr = print_drive($newdrive); - $local_volumes->{$opt} = $drivestr; - #pass drive to conf for command line - $conf->{$opt} = $drivestr; + foreach my $opt (keys %{$migrate_opts->{nbd}}) { + $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr}; } } - PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1); + return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts); + }); +} - if ($is_suspended) { - # enforce machine type on suspended vm to ensure HW compatibility - $forcemachine = $conf->{runningmachine}; - print "Resuming suspended VM\n"; - } - my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine); +# params: +# statefile => 'tcp', 'unix' for migration or path/volid for RAM state +# skiplock => 0/1, skip checking for config lock +# forcemachine => to force Qemu machine (rollback/migration) +# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options +# timeout => in seconds +# paused => start VM in paused state (backup) +# resume => resume from hibernation +# migrate_opts: +# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks) +# migratedfrom => source node +# spice_ticket => used for spice migration, passed via tunnel/stdin +# network => CIDR of migration network +# type => secure/insecure - tunnel over encrypted connection or plain-text +# nbd_proto_version => int, 0 for TCP, 1 for UNIX +# replicated_volumes = which volids should be re-used with bitmaps for nbd migration +sub vm_start_nolock { + my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_; + + my $statefile = $params->{statefile}; + my $resume = $params->{resume}; + + my $migratedfrom = $migrate_opts->{migratedfrom}; + my $migration_type = $migrate_opts->{type}; - my $migration_ip; - my $get_migration_ip = sub { - my ($cidr, $nodename) = @_; + my $res = {}; - return $migration_ip if defined($migration_ip); + # clean up leftover reboot request files + eval { clear_reboot_request($vmid); }; + warn $@ if $@; - if (!defined($cidr)) { - my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg'); - $cidr = $dc_conf->{migration}->{network}; - } + if (!$statefile && scalar(keys %{$conf->{pending}})) { + vmconfig_apply_pending($vmid, $conf, $storecfg); + $conf = PVE::QemuConfig->load_config($vmid); # update/reload + } - if (defined($cidr)) { - my $ips = PVE::Network::get_local_ip_from_cidr($cidr); + PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid); - die "could not get IP: no address configured on local " . - "node for network '$cidr'\n" if scalar(@$ips) == 0; + my $defaults = load_defaults(); - die "could not get IP: multiple addresses configured on local " . - "node for network '$cidr'\n" if scalar(@$ips) > 1; + # set environment variable useful inside network script + $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom; - $migration_ip = @$ips[0]; - } + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1); - $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1) - if !defined($migration_ip); + my $forcemachine = $params->{forcemachine}; + my $forcecpu = $params->{forcecpu}; + if ($resume) { + # enforce machine and CPU type on suspended vm to ensure HW compatibility + $forcemachine = $conf->{runningmachine}; + $forcecpu = $conf->{runningcpu}; + print "Resuming suspended VM\n"; + } - return $migration_ip; - }; + my ($cmd, $vollist, $spice_port) = + config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu); - my $migrate_uri; - if ($statefile) { - if ($statefile eq 'tcp') { - my $localip = "localhost"; - my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg'); - my $nodename = nodename(); + my $migration_ip; + my $get_migration_ip = sub { + my ($nodename) = @_; - if (!defined($migration_type)) { - if (defined($datacenterconf->{migration}->{type})) { - $migration_type = $datacenterconf->{migration}->{type}; - } else { - $migration_type = 'secure'; - } - } + return $migration_ip if defined($migration_ip); - if ($migration_type eq 'insecure') { - $localip = $get_migration_ip->($migration_network, $nodename); - $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip); - } + my $cidr = $migrate_opts->{network}; - my $pfamily = PVE::Tools::get_host_address_family($nodename); - my $migrate_port = PVE::Tools::next_migrate_port($pfamily); - $migrate_uri = "tcp:${localip}:${migrate_port}"; - push @$cmd, '-incoming', $migrate_uri; - push @$cmd, '-S'; + if (!defined($cidr)) { + my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg'); + $cidr = $dc_conf->{migration}->{network}; + } - } elsif ($statefile eq 'unix') { - # should be default for secure migrations as a ssh TCP forward - # tunnel is not deterministic reliable ready and fails regurarly - # to set up in time, so use UNIX socket forwards - my $socket_addr = "/run/qemu-server/$vmid.migrate"; - unlink $socket_addr; + if (defined($cidr)) { + my $ips = PVE::Network::get_local_ip_from_cidr($cidr); - $migrate_uri = "unix:$socket_addr"; + die "could not get IP: no address configured on local " . + "node for network '$cidr'\n" if scalar(@$ips) == 0; - push @$cmd, '-incoming', $migrate_uri; - push @$cmd, '-S'; + die "could not get IP: multiple addresses configured on local " . + "node for network '$cidr'\n" if scalar(@$ips) > 1; - } elsif (-e $statefile) { - push @$cmd, '-loadstate', $statefile; - } else { - my $statepath = PVE::Storage::path($storecfg, $statefile); - push @$vollist, $statefile; - push @$cmd, '-loadstate', $statepath; - } - } elsif ($paused) { - push @$cmd, '-S'; + $migration_ip = @$ips[0]; } - # host pci devices - for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { - my $d = parse_hostpci($conf->{"hostpci$i"}); - next if !$d; - my $pcidevices = $d->{pciid}; - foreach my $pcidevice (@$pcidevices) { - my $pciid = $pcidevice->{id}; + $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1) + if !defined($migration_ip); - my $info = PVE::SysFSTools::pci_device_info("$pciid"); - die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support(); - die "no pci device info for device '$pciid'\n" if !$info; + return $migration_ip; + }; - if ($d->{mdev}) { - my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i); - PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $d->{mdev}); + my $migrate_uri; + if ($statefile) { + if ($statefile eq 'tcp') { + my $localip = "localhost"; + my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg'); + my $nodename = nodename(); + + if (!defined($migration_type)) { + if (defined($datacenterconf->{migration}->{type})) { + $migration_type = $datacenterconf->{migration}->{type}; } else { - die "can't unbind/bind pci group to vfio '$pciid'\n" - if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid); - die "can't reset pci device '$pciid'\n" - if $info->{has_fl_reset} and !PVE::SysFSTools::pci_dev_reset($info); + $migration_type = 'secure'; } - } - } + } - PVE::Storage::activate_volumes($storecfg, $vollist); + if ($migration_type eq 'insecure') { + $localip = $get_migration_ip->($nodename); + $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip); + } - eval { - run_command(['/bin/systemctl', 'stop', "$vmid.scope"], - outfunc => sub {}, errfunc => sub {}); - }; - # Issues with the above 'stop' not being fully completed are extremely rare, a very low - # timeout should be more than enough here... - PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5); + my $pfamily = PVE::Tools::get_host_address_family($nodename); + my $migrate_port = PVE::Tools::next_migrate_port($pfamily); + $migrate_uri = "tcp:${localip}:${migrate_port}"; + push @$cmd, '-incoming', $migrate_uri; + push @$cmd, '-S'; + + } elsif ($statefile eq 'unix') { + # should be default for secure migrations as a ssh TCP forward + # tunnel is not deterministic reliable ready and fails regurarly + # to set up in time, so use UNIX socket forwards + my $socket_addr = "/run/qemu-server/$vmid.migrate"; + unlink $socket_addr; - my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits} - : $defaults->{cpuunits}; + $migrate_uri = "unix:$socket_addr"; - my $start_timeout = ($conf->{hugepages} || $is_suspended) ? 300 : 30; - my %run_params = ( - timeout => $statefile ? undef : $start_timeout, - umask => 0077, - noerr => 1, - ); + push @$cmd, '-incoming', $migrate_uri; + push @$cmd, '-S'; - # when migrating, prefix QEMU output so other side can pick up any - # errors that might occur and show the user - if ($migratedfrom) { - $run_params{quiet} = 1; - $run_params{logfunc} = sub { print "QEMU: $_[0]\n" }; + } elsif (-e $statefile) { + push @$cmd, '-loadstate', $statefile; + } else { + my $statepath = PVE::Storage::path($storecfg, $statefile); + push @$vollist, $statefile; + push @$cmd, '-loadstate', $statepath; } + } elsif ($params->{paused}) { + push @$cmd, '-S'; + } - my %properties = ( - Slice => 'qemu.slice', - KillMode => 'none', - CPUShares => $cpuunits - ); + # host pci devices + for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) { + my $d = parse_hostpci($conf->{"hostpci$i"}); + next if !$d; + my $pcidevices = $d->{pciid}; + foreach my $pcidevice (@$pcidevices) { + my $pciid = $pcidevice->{id}; + + my $info = PVE::SysFSTools::pci_device_info("$pciid"); + die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support(); + die "no pci device info for device '$pciid'\n" if !$info; + + if ($d->{mdev}) { + my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i); + PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $d->{mdev}); + } else { + die "can't unbind/bind pci group to vfio '$pciid'\n" + if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid); + die "can't reset pci device '$pciid'\n" + if $info->{has_fl_reset} and !PVE::SysFSTools::pci_dev_reset($info); + } + } + } - if (my $cpulimit = $conf->{cpulimit}) { - $properties{CPUQuota} = int($cpulimit * 100); - } - $properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick + PVE::Storage::activate_volumes($storecfg, $vollist); - my $run_qemu = sub { - PVE::Tools::run_fork sub { - PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties); + eval { + run_command(['/bin/systemctl', 'stop', "$vmid.scope"], + outfunc => sub {}, errfunc => sub {}); + }; + # Issues with the above 'stop' not being fully completed are extremely rare, a very low + # timeout should be more than enough here... + PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5); + + my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits} + : $defaults->{cpuunits}; + + my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume); + my %run_params = ( + timeout => $statefile ? undef : $start_timeout, + umask => 0077, + noerr => 1, + ); - my $exitcode = run_command($cmd, %run_params); - die "QEMU exited with code $exitcode\n" if $exitcode; - }; - }; + # when migrating, prefix QEMU output so other side can pick up any + # errors that might occur and show the user + if ($migratedfrom) { + $run_params{quiet} = 1; + $run_params{logfunc} = sub { print "QEMU: $_[0]\n" }; + } - if ($conf->{hugepages}) { + my %properties = ( + Slice => 'qemu.slice', + KillMode => 'none', + CPUShares => $cpuunits + ); - my $code = sub { - my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf); - my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology(); + if (my $cpulimit = $conf->{cpulimit}) { + $properties{CPUQuota} = int($cpulimit * 100); + } + $properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick - PVE::QemuServer::Memory::hugepages_mount(); - PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology); + my $run_qemu = sub { + PVE::Tools::run_fork sub { + PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties); - eval { $run_qemu->() }; - if (my $err = $@) { - PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology); - die $err; - } + my $exitcode = run_command($cmd, %run_params); + die "QEMU exited with code $exitcode\n" if $exitcode; + }; + }; - PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology); - }; - eval { PVE::QemuServer::Memory::hugepages_update_locked($code); }; + if ($conf->{hugepages}) { + + my $code = sub { + my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf); + my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology(); + + PVE::QemuServer::Memory::hugepages_mount(); + PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology); - } else { eval { $run_qemu->() }; - } + if (my $err = $@) { + PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology); + die $err; + } - if (my $err = $@) { - # deactivate volumes if start fails - eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); }; - die "start failed: $err"; - } + PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology); + }; + eval { PVE::QemuServer::Memory::hugepages_update_locked($code); }; - print "migration listens on $migrate_uri\n" if $migrate_uri; + } else { + eval { $run_qemu->() }; + } - if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') { - eval { mon_cmd($vmid, "cont"); }; - warn $@ if $@; - } + if (my $err = $@) { + # deactivate volumes if start fails + eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); }; + die "start failed: $err"; + } + + print "migration listens on $migrate_uri\n" if $migrate_uri; + $res->{migrate_uri} = $migrate_uri; + + if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') { + eval { mon_cmd($vmid, "cont"); }; + warn $@ if $@; + } + + #start nbd server for storage migration + if (my $nbd = $migrate_opts->{nbd}) { + my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0; - #start nbd server for storage migration - if ($targetstorage) { + my $migrate_storage_uri; + # nbd_protocol_version > 0 for unix socket support + if ($nbd_protocol_version > 0 && $migration_type eq 'secure') { + my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate"; + mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } ); + $migrate_storage_uri = "nbd:unix:$socket_path"; + } else { my $nodename = nodename(); - my $localip = $get_migration_ip->($migration_network, $nodename); + my $localip = $get_migration_ip->($nodename); my $pfamily = PVE::Tools::get_host_address_family($nodename); my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily); mon_cmd($vmid, "nbd-server-start", addr => { type => 'inet', data => { host => "${localip}", port => "${storage_migrate_port}" } } ); - $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip); + $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}"; + } - foreach my $opt (sort keys %$local_volumes) { - my $volid = $local_volumes->{$opt}; - mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true ); - my $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}:exportname=drive-$opt"; - print "storage migration listens on $migrate_storage_uri volume:$volid\n"; - } + $res->{migrate_storage_uri} = $migrate_storage_uri; + + foreach my $opt (sort keys %$nbd) { + my $drivestr = $nbd->{$opt}->{drivestr}; + my $volid = $nbd->{$opt}->{volid}; + mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true ); + my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt"; + print "storage migration listens on $nbd_uri volume:$drivestr\n"; + print "re-using replicated volume: $opt - $volid\n" + if $nbd->{$opt}->{replicated}; + + $res->{drives}->{$opt} = $nbd->{$opt}; + $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri; } + } - if ($migratedfrom) { - eval { - set_migration_caps($vmid); - }; - warn $@ if $@; + if ($migratedfrom) { + eval { + set_migration_caps($vmid); + }; + warn $@ if $@; - if ($spice_port) { - print "spice listens on port $spice_port\n"; - if ($spice_ticket) { - mon_cmd($vmid, "set_password", protocol => 'spice', password => $spice_ticket); - mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30"); - } + if ($spice_port) { + print "spice listens on port $spice_port\n"; + $res->{spice_port} = $spice_port; + if ($migrate_opts->{spice_ticket}) { + mon_cmd($vmid, "set_password", protocol => 'spice', password => $migrate_opts->{spice_ticket}); + mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30"); } + } - } else { - mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024) - if !$statefile && $conf->{balloon}; + } else { + mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024) + if !$statefile && $conf->{balloon}; - foreach my $opt (keys %$conf) { - next if $opt !~ m/^net\d+$/; - my $nicconf = parse_net($conf->{$opt}); - qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down}; - } + foreach my $opt (keys %$conf) { + next if $opt !~ m/^net\d+$/; + my $nicconf = parse_net($conf->{$opt}); + qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down}; } + } - mon_cmd($vmid, 'qom-set', - path => "machine/peripheral/balloon0", - property => "guest-stats-polling-interval", - value => 2) if (!defined($conf->{balloon}) || $conf->{balloon}); + mon_cmd($vmid, 'qom-set', + path => "machine/peripheral/balloon0", + property => "guest-stats-polling-interval", + value => 2) if (!defined($conf->{balloon}) || $conf->{balloon}); - if ($is_suspended) { - print "Resumed VM, removing state\n"; - if (my $vmstate = $conf->{vmstate}) { - PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); - PVE::Storage::vdisk_free($storecfg, $vmstate); - } - delete $conf->@{qw(lock vmstate runningmachine)}; - PVE::QemuConfig->write_config($vmid, $conf); + if ($resume) { + print "Resumed VM, removing state\n"; + if (my $vmstate = $conf->{vmstate}) { + PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); + PVE::Storage::vdisk_free($storecfg, $vmstate); } + delete $conf->@{qw(lock vmstate runningmachine runningcpu)}; + PVE::QemuConfig->write_config($vmid, $conf); + } - PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start'); - }); + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start'); + + return $res; } sub vm_commandline { @@ -5537,13 +5137,15 @@ sub vm_commandline { my $conf = PVE::QemuConfig->load_config($vmid); my $forcemachine; + my $forcecpu; if ($snapname) { my $snapshot = $conf->{snapshots}->{$snapname}; die "snapshot '$snapname' does not exist\n" if !defined($snapshot); - # check for a 'runningmachine' in snapshot - $forcemachine = $snapshot->{runningmachine} if $snapshot->{runningmachine}; + # check for machine or CPU overrides in snapshot + $forcemachine = $snapshot->{runningmachine}; + $forcecpu = $snapshot->{runningcpu}; $snapshot->{digest} = $conf->{digest}; # keep file digest for API @@ -5552,7 +5154,8 @@ sub vm_commandline { my $defaults = load_defaults(); - my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine); + my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, + $forcemachine, $forcecpu); return PVE::Tools::cmd2string($cmd); } @@ -5680,7 +5283,11 @@ sub _do_vm_stop { return; } } else { - if ($force) { + if (!check_running($vmid, $nocheck)) { + warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n"; + return; + } + if ($force) { warn "VM quit/powerdown failed - terminating now with SIGTERM\n"; kill 15, $pid; } else { @@ -5826,7 +5433,7 @@ sub vm_suspend { mon_cmd($vmid, "savevm-end"); PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); PVE::Storage::vdisk_free($storecfg, $vmstate); - delete $conf->@{qw(vmstate runningmachine)}; + delete $conf->@{qw(vmstate runningmachine runningcpu)}; PVE::QemuConfig->write_config($vmid, $conf); }; warn $@ if $@; @@ -5925,31 +5532,15 @@ sub tar_restore_cleanup { } } -sub restore_archive { +sub restore_file_archive { my ($archive, $vmid, $user, $opts) = @_; - my $format = $opts->{format}; - my $comp; - - if ($archive =~ m/\.tgz$/ || $archive =~ m/\.tar\.gz$/) { - $format = 'tar' if !$format; - $comp = 'gzip'; - } elsif ($archive =~ m/\.tar$/) { - $format = 'tar' if !$format; - } elsif ($archive =~ m/.tar.lzo$/) { - $format = 'tar' if !$format; - $comp = 'lzop'; - } elsif ($archive =~ m/\.vma$/) { - $format = 'vma' if !$format; - } elsif ($archive =~ m/\.vma\.gz$/) { - $format = 'vma' if !$format; - $comp = 'gzip'; - } elsif ($archive =~ m/\.vma\.lzo$/) { - $format = 'vma' if !$format; - $comp = 'lzop'; - } else { - $format = 'vma' if !$format; # default - } + return restore_vma_archive($archive, $vmid, $user, $opts) + if $archive eq '-'; + + my $info = PVE::Storage::archive_info($archive); + my $format = $opts->{format} // $info->{format}; + my $comp = $info->{compression}; # try to detect archive format if ($format eq 'tar') { @@ -5959,7 +5550,145 @@ sub restore_archive { } } -sub restore_update_config_line { +# hepler to remove disks that will not be used after restore +my $restore_cleanup_oldconf = sub { + my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_; + + PVE::QemuConfig->foreach_volume($oldconf, sub { + my ($ds, $drive) = @_; + + return if drive_is_cdrom($drive, 1); + + my $volid = $drive->{file}; + return if !$volid || $volid =~ m|^/|; + + my ($path, $owner) = PVE::Storage::path($storecfg, $volid); + return if !$path || !$owner || ($owner != $vmid); + + # Note: only delete disk we want to restore + # other volumes will become unused + if ($virtdev_hash->{$ds}) { + eval { PVE::Storage::vdisk_free($storecfg, $volid); }; + if (my $err = $@) { + warn $err; + } + } + }); + + # delete vmstate files, after the restore we have no snapshots anymore + foreach my $snapname (keys %{$oldconf->{snapshots}}) { + my $snap = $oldconf->{snapshots}->{$snapname}; + if ($snap->{vmstate}) { + eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); }; + if (my $err = $@) { + warn $err; + } + } + } +}; + +# Helper to parse vzdump backup device hints +# +# $rpcenv: Environment, used to ckeck storage permissions +# $user: User ID, to check storage permissions +# $storecfg: Storage configuration +# $fh: the file handle for reading the configuration +# $devinfo: should contain device sizes for all backu-up'ed devices +# $options: backup options (pool, default storage) +# +# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid) +my $parse_backup_hints = sub { + my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_; + + my $virtdev_hash = {}; + + while (defined(my $line = <$fh>)) { + if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) { + my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4); + die "archive does not contain data for drive '$virtdev'\n" + if !$devinfo->{$devname}; + + if (defined($options->{storage})) { + $storeid = $options->{storage} || 'local'; + } elsif (!$storeid) { + $storeid = 'local'; + } + $format = 'raw' if !$format; + $devinfo->{$devname}->{devname} = $devname; + $devinfo->{$devname}->{virtdev} = $virtdev; + $devinfo->{$devname}->{format} = $format; + $devinfo->{$devname}->{storeid} = $storeid; + + # check permission on storage + my $pool = $options->{pool}; # todo: do we need that? + if ($user ne 'root@pam') { + $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']); + } + + $virtdev_hash->{$virtdev} = $devinfo->{$devname}; + } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) { + my $virtdev = $1; + my $drive = parse_drive($virtdev, $2); + if (drive_is_cloudinit($drive)) { + my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}); + $storeid = $options->{storage} if defined ($options->{storage}); + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback + + $virtdev_hash->{$virtdev} = { + format => $format, + storeid => $storeid, + size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE, + is_cloudinit => 1, + }; + } + } + } + + return $virtdev_hash; +}; + +# Helper to allocate and activate all volumes required for a restore +# +# $storecfg: Storage configuration +# $virtdev_hash: as returned by parse_backup_hints() +# +# Returns: { $virtdev => $volid } +my $restore_allocate_devices = sub { + my ($storecfg, $virtdev_hash, $vmid) = @_; + + my $map = {}; + foreach my $virtdev (sort keys %$virtdev_hash) { + my $d = $virtdev_hash->{$virtdev}; + my $alloc_size = int(($d->{size} + 1024 - 1)/1024); + my $storeid = $d->{storeid}; + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + + # test if requested format is supported + my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); + my $supported = grep { $_ eq $d->{format} } @$validFormats; + $d->{format} = $defFormat if !$supported; + + my $name; + if ($d->{is_cloudinit}) { + $name = "vm-$vmid-cloudinit"; + $name .= ".$d->{format}" if $d->{format} ne 'raw'; + } + + my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size); + + print STDERR "new volume ID is '$volid'\n"; + $d->{volid} = $volid; + + PVE::Storage::activate_volumes($storecfg, [$volid]); + + $map->{$virtdev} = $volid; + } + + return $map; +}; + +my $restore_update_config_line = sub { my ($outfd, $cookie, $vmid, $map, $line, $unique) = @_; return if $line =~ m/^\#qmdump\#/; @@ -6022,7 +5751,37 @@ sub restore_update_config_line { } else { print $outfd $line; } -} +}; + +my $restore_deactivate_volumes = sub { + my ($storecfg, $devinfo) = @_; + + my $vollist = []; + foreach my $devname (keys %$devinfo) { + my $volid = $devinfo->{$devname}->{volid}; + push @$vollist, $volid if $volid; + } + + PVE::Storage::deactivate_volumes($storecfg, $vollist); +}; + +my $restore_destroy_volumes = sub { + my ($storecfg, $devinfo) = @_; + + foreach my $devname (keys %$devinfo) { + my $volid = $devinfo->{$devname}->{volid}; + next if !$volid; + eval { + if ($volid =~ m|^/|) { + unlink $volid || die 'unlink failed\n'; + } else { + PVE::Storage::vdisk_free($storecfg, $volid); + } + print STDERR "temporary volume '$volid' sucessfuly removed\n"; + }; + print STDERR "unable to cleanup '$volid' - $@" if $@; + } +}; sub scan_volids { my ($cfg, $vmid) = @_; @@ -6041,73 +5800,11 @@ sub scan_volids { return $volid_hash; } -sub is_volume_in_use { - my ($storecfg, $conf, $skip_drive, $volid) = @_; - - my $path = PVE::Storage::path($storecfg, $volid); - - my $scan_config = sub { - my ($cref, $snapname) = @_; - - foreach my $key (keys %$cref) { - my $value = $cref->{$key}; - if (is_valid_drivename($key)) { - next if $skip_drive && $key eq $skip_drive; - my $drive = parse_drive($key, $value); - next if !$drive || !$drive->{file} || drive_is_cdrom($drive); - return 1 if $volid eq $drive->{file}; - if ($drive->{file} =~ m!^/!) { - return 1 if $drive->{file} eq $path; - } else { - my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}, 1); - next if !$storeid; - my $scfg = PVE::Storage::storage_config($storecfg, $storeid, 1); - next if !$scfg; - return 1 if $path eq PVE::Storage::path($storecfg, $drive->{file}, $snapname); - } - } - } - - return 0; - }; - - return 1 if &$scan_config($conf); - - undef $skip_drive; - - foreach my $snapname (keys %{$conf->{snapshots}}) { - return 1 if &$scan_config($conf->{snapshots}->{$snapname}, $snapname); - } - - return 0; -} - -sub update_disksize { - my ($drive, $volid_hash) = @_; - - my $volid = $drive->{file}; - return undef if !defined($volid); - - my $oldsize = $drive->{size}; - my $newsize = $volid_hash->{$volid}->{size}; - - if (defined($newsize) && defined($oldsize) && $newsize != $oldsize) { - $drive->{size} = $newsize; - - my $old_fmt = PVE::JSONSchema::format_size($oldsize); - my $new_fmt = PVE::JSONSchema::format_size($newsize); - - return wantarray ? ($drive, $old_fmt, $new_fmt) : $drive; - } - - return undef; -} - sub update_disk_config { my ($vmid, $conf, $volid_hash) = @_; my $changes; - my $prefix = "VM $vmid:"; + my $prefix = "VM $vmid"; # used and unused disks my $referenced = {}; @@ -6119,35 +5816,37 @@ sub update_disk_config { my $referencedpath = {}; # update size info - foreach my $opt (keys %$conf) { - if (is_valid_drivename($opt)) { - my $drive = parse_drive($opt, $conf->{$opt}); - my $volid = $drive->{file}; - next if !$volid; - - # mark volid as "in-use" for next step - $referenced->{$volid} = 1; - if ($volid_hash->{$volid} && - (my $path = $volid_hash->{$volid}->{path})) { - $referencedpath->{$path} = 1; - } + PVE::QemuConfig->foreach_volume($conf, sub { + my ($opt, $drive) = @_; - next if drive_is_cdrom($drive); - next if !$volid_hash->{$volid}; + my $volid = $drive->{file}; + return if !$volid; - my ($updated, $old_size, $new_size) = update_disksize($drive, $volid_hash); - if (defined($updated)) { - $changes = 1; - $conf->{$opt} = print_drive($updated); - print "$prefix size of disk '$volid' ($opt) updated from $old_size to $new_size\n"; - } + # mark volid as "in-use" for next step + $referenced->{$volid} = 1; + if ($volid_hash->{$volid} && + (my $path = $volid_hash->{$volid}->{path})) { + $referencedpath->{$path} = 1; } - } + + return if drive_is_cdrom($drive); + return if !$volid_hash->{$volid}; + + my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volid_hash->{$volid}->{size}); + if (defined($updated)) { + $changes = 1; + $conf->{$opt} = print_drive($updated); + print "$prefix ($opt): $msg\n"; + } + }); # remove 'unusedX' entry if volume is used - foreach my $opt (keys %$conf) { - next if $opt !~ m/^unused\d+$/; - my $volid = $conf->{$opt}; + PVE::QemuConfig->foreach_unused_volume($conf, sub { + my ($opt, $drive) = @_; + + my $volid = $drive->{file}; + return if !$volid; + my $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid}; if ($referenced->{$volid} || ($path && $referencedpath->{$path})) { print "$prefix remove entry '$opt', its volume '$volid' is in use\n"; @@ -6157,7 +5856,7 @@ sub update_disk_config { $referenced->{$volid} = 1; $referencedpath->{$path} = 1 if $path; - } + }); foreach my $volid (sort keys %$volid_hash) { next if $volid =~ m/vm-$vmid-state-/; @@ -6224,6 +5923,179 @@ sub rescan { } } +sub restore_proxmox_backup_archive { + my ($archive, $vmid, $user, $options) = @_; + + my $storecfg = PVE::Storage::config(); + + my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive); + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + + my $server = $scfg->{server}; + my $datastore = $scfg->{datastore}; + my $username = $scfg->{username} // 'root@pam'; + my $fingerprint = $scfg->{fingerprint}; + + my $repo = "$username\@$server:$datastore"; + + # This is only used for `pbs-restore`! + my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid); + local $ENV{PBS_PASSWORD} = $password; + local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint); + + my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) = + PVE::Storage::parse_volname($storecfg, $archive); + + die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup'; + + die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm'; + + my $tmpdir = "/var/tmp/vzdumptmp$$"; + rmtree $tmpdir; + mkpath $tmpdir; + + my $conffile = PVE::QemuConfig->config_file($vmid); + my $tmpfn = "$conffile.$$.tmp"; + # disable interrupts (always do cleanups) + local $SIG{INT} = + local $SIG{TERM} = + local $SIG{QUIT} = + local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; }; + + # Note: $oldconf is undef if VM does not exists + my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid); + my $oldconf = PVE::Cluster::cfs_read_file($cfs_path); + + my $rpcenv = PVE::RPCEnvironment::get(); + my $devinfo = {}; + + eval { + # enable interrupts + local $SIG{INT} = + local $SIG{TERM} = + local $SIG{QUIT} = + local $SIG{HUP} = + local $SIG{PIPE} = sub { die "interrupted by signal\n"; }; + + my $cfgfn = "$tmpdir/qemu-server.conf"; + my $firewall_config_fn = "$tmpdir/fw.conf"; + my $index_fn = "$tmpdir/index.json"; + + my $cmd = "restore"; + + my $param = [$pbs_backup_name, "index.json", $index_fn]; + PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param); + my $index = PVE::Tools::file_get_contents($index_fn); + $index = decode_json($index); + + # print Dumper($index); + foreach my $info (@{$index->{files}}) { + if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) { + my $devname = $1; + if ($info->{size} =~ m/^(\d+)$/) { # untaint size + $devinfo->{$devname}->{size} = $1; + } else { + die "unable to parse file size in 'index.json' - got '$info->{size}'\n"; + } + } + } + + my $is_qemu_server_backup = scalar(grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}); + if (!$is_qemu_server_backup) { + die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n"; + } + my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}}); + + $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn]; + PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param); + + if ($has_firewall_config) { + $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn]; + PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param); + + my $pve_firewall_dir = '/etc/pve/firewall'; + mkdir $pve_firewall_dir; # make sure the dir exists + PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw"); + } + + my $fh = IO::File->new($cfgfn, "r") || + "unable to read qemu-server.conf - $!\n"; + + my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options); + + # fixme: rate limit? + + # create empty/temp config + PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create"); + + $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf; + + # allocate volumes + my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid); + + foreach my $virtdev (sort keys %$virtdev_hash) { + my $d = $virtdev_hash->{$virtdev}; + next if $d->{is_cloudinit}; # no need to restore cloudinit + + my $volid = $d->{volid}; + + my $path = PVE::Storage::path($storecfg, $volid); + + # This is the ONLY user of the PBS_ env vars set on top of this function! + my $pbs_restore_cmd = [ + '/usr/bin/pbs-restore', + '--repository', $repo, + $pbs_backup_name, + "$d->{devname}.img.fidx", + $path, + '--verbose', + ]; + + push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format}; + + if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) { + push @$pbs_restore_cmd, '--skip-zero'; + } + + my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd); + print "restore proxmox backup image: $dbg_cmdstring\n"; + run_command($pbs_restore_cmd); + } + + $fh->seek(0, 0) || die "seek failed - $!\n"; + + my $outfd = new IO::File ($tmpfn, "w") || + die "unable to write config for VM $vmid\n"; + + my $cookie = { netcount => 0 }; + while (defined(my $line = <$fh>)) { + $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $options->{unique}); + } + + $fh->close(); + $outfd->close(); + }; + my $err = $@; + + $restore_deactivate_volumes->($storecfg, $devinfo); + + rmtree $tmpdir; + + if ($err) { + unlink $tmpfn; + $restore_destroy_volumes->($storecfg, $devinfo); + die $err; + } + + rename($tmpfn, $conffile) || + die "unable to commit configuration file '$conffile'\n"; + + PVE::Cluster::cfs_update(); # make sure we read new file + + eval { rescan($vmid, 1); }; + warn $@ if $@; +} + sub restore_vma_archive { my ($archive, $vmid, $user, $opts, $comp) = @_; @@ -6261,14 +6133,9 @@ sub restore_vma_archive { } if ($comp) { - my $cmd; - if ($comp eq 'gzip') { - $cmd = ['zcat', $readfrom]; - } elsif ($comp eq 'lzop') { - $cmd = ['lzop', '-d', '-c', $readfrom]; - } else { - die "unknown compression method '$comp'\n"; - } + my $info = PVE::Storage::decompressor_info('vma', $comp); + my $cmd = $info->{decompressor}; + push @$cmd, $readfrom; $add_pipe->($cmd); } @@ -6308,8 +6175,6 @@ sub restore_vma_archive { my %storage_limits; my $print_devmap = sub { - my $virtdev_hash = {}; - my $cfgfn = "$tmpdir/qemu-server.conf"; # we can read the config - that is already extracted @@ -6323,51 +6188,7 @@ sub restore_vma_archive { PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw"); } - while (defined(my $line = <$fh>)) { - if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) { - my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4); - die "archive does not contain data for drive '$virtdev'\n" - if !$devinfo->{$devname}; - if (defined($opts->{storage})) { - $storeid = $opts->{storage} || 'local'; - } elsif (!$storeid) { - $storeid = 'local'; - } - $format = 'raw' if !$format; - $devinfo->{$devname}->{devname} = $devname; - $devinfo->{$devname}->{virtdev} = $virtdev; - $devinfo->{$devname}->{format} = $format; - $devinfo->{$devname}->{storeid} = $storeid; - - # check permission on storage - my $pool = $opts->{pool}; # todo: do we need that? - if ($user ne 'root@pam') { - $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']); - } - - $storage_limits{$storeid} = $bwlimit; - - $virtdev_hash->{$virtdev} = $devinfo->{$devname}; - } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) { - my $virtdev = $1; - my $drive = parse_drive($virtdev, $2); - if (drive_is_cloudinit($drive)) { - my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}); - my $scfg = PVE::Storage::storage_config($cfg, $storeid); - my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback - - my $d = { - format => $format, - storeid => $opts->{storage} // $storeid, - size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE, - file => $drive->{file}, # to make drive_is_cloudinit check possible - name => "vm-$vmid-cloudinit", - is_cloudinit => 1, - }; - $virtdev_hash->{$virtdev} = $d; - } - } - } + my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts); foreach my $key (keys %storage_limits) { my $limit = PVE::Storage::get_bandwidth_limit('restore', [$key], $bwlimit); @@ -6384,81 +6205,35 @@ sub restore_vma_archive { # create empty/temp config if ($oldconf) { PVE::Tools::file_set_contents($conffile, "memory: 128\n"); - foreach_drive($oldconf, sub { - my ($ds, $drive) = @_; - - return if drive_is_cdrom($drive, 1); - - my $volid = $drive->{file}; - return if !$volid || $volid =~ m|^/|; - - my ($path, $owner) = PVE::Storage::path($cfg, $volid); - return if !$path || !$owner || ($owner != $vmid); - - # Note: only delete disk we want to restore - # other volumes will become unused - if ($virtdev_hash->{$ds}) { - eval { PVE::Storage::vdisk_free($cfg, $volid); }; - if (my $err = $@) { - warn $err; - } - } - }); - - # delete vmstate files, after the restore we have no snapshots anymore - foreach my $snapname (keys %{$oldconf->{snapshots}}) { - my $snap = $oldconf->{snapshots}->{$snapname}; - if ($snap->{vmstate}) { - eval { PVE::Storage::vdisk_free($cfg, $snap->{vmstate}); }; - if (my $err = $@) { - warn $err; - } - } - } + $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash); } - my $map = {}; + # allocate volumes + my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid); + + # print restore information to $fifofh foreach my $virtdev (sort keys %$virtdev_hash) { my $d = $virtdev_hash->{$virtdev}; - my $alloc_size = int(($d->{size} + 1024 - 1)/1024); + next if $d->{is_cloudinit}; # no need to restore cloudinit + my $storeid = $d->{storeid}; - my $scfg = PVE::Storage::storage_config($cfg, $storeid); + my $volid = $d->{volid}; my $map_opts = ''; if (my $limit = $storage_limits{$storeid}) { $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:"; } - # test if requested format is supported - my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($cfg, $storeid); - my $supported = grep { $_ eq $d->{format} } @$validFormats; - $d->{format} = $defFormat if !$supported; - - my $name; - if ($d->{is_cloudinit}) { - $name = $d->{name}; - $name .= ".$d->{format}" if $d->{format} ne 'raw'; - } - - my $volid = PVE::Storage::vdisk_alloc($cfg, $storeid, $vmid, $d->{format}, $name, $alloc_size); - print STDERR "new volume ID is '$volid'\n"; - $d->{volid} = $volid; - - PVE::Storage::activate_volumes($cfg, [$volid]); - my $write_zeros = 1; if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) { $write_zeros = 0; } - if (!$d->{is_cloudinit}) { - my $path = PVE::Storage::path($cfg, $volid); + my $path = PVE::Storage::path($cfg, $volid); - print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n"; + print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n"; - print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n"; - } - $map->{$virtdev} = $volid; + print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n"; } $fh->seek(0, 0) || die "seek failed - $!\n"; @@ -6468,7 +6243,7 @@ sub restore_vma_archive { my $cookie = { netcount => 0 }; while (defined(my $line = <$fh>)) { - restore_update_config_line($outfd, $cookie, $vmid, $map, $line, $opts->{unique}); + $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $opts->{unique}); } $fh->close(); @@ -6515,38 +6290,17 @@ sub restore_vma_archive { alarm($oldtimeout) if $oldtimeout; - my $vollist = []; - foreach my $devname (keys %$devinfo) { - my $volid = $devinfo->{$devname}->{volid}; - push @$vollist, $volid if $volid; - } - - PVE::Storage::deactivate_volumes($cfg, $vollist); + $restore_deactivate_volumes->($cfg, $devinfo); unlink $mapfifo; + rmtree $tmpdir; if ($err) { - rmtree $tmpdir; unlink $tmpfn; - - foreach my $devname (keys %$devinfo) { - my $volid = $devinfo->{$devname}->{volid}; - next if !$volid; - eval { - if ($volid =~ m|^/|) { - unlink $volid || die 'unlink failed\n'; - } else { - PVE::Storage::vdisk_free($cfg, $volid); - } - print STDERR "temporary volume '$volid' sucessfuly removed\n"; - }; - print STDERR "unable to cleanup '$volid' - $@" if $@; - } + $restore_destroy_volumes->($cfg, $devinfo); die $err; } - rmtree $tmpdir; - rename($tmpfn, $conffile) || die "unable to commit configuration file '$conffile'\n"; @@ -6643,7 +6397,7 @@ sub restore_tar_archive { my $cookie = { netcount => 0 }; while (defined (my $line = <$srcfd>)) { - restore_update_config_line($outfd, $cookie, $vmid, $map, $line, $opts->{unique}); + $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $opts->{unique}); } $srcfd->close(); @@ -6671,7 +6425,7 @@ sub foreach_storage_used_by_vm { my $sidhash = {}; - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; return if drive_is_cdrom($drive); @@ -6722,7 +6476,7 @@ sub template_create { my $storecfg = PVE::Storage::config(); - foreach_drive($conf, sub { + PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; return if drive_is_cdrom($drive); @@ -6778,7 +6532,7 @@ sub qemu_img_convert { $cachemode = 'none' if $src_scfg->{type} eq 'zfspool'; } elsif (-f $src_volid) { $src_path = $src_volid; - if ($src_path =~ m/\.($QEMU_FORMAT_RE)$/) { + if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) { $src_format = $1; } } @@ -6839,7 +6593,7 @@ sub qemu_img_convert { sub qemu_img_format { my ($scfg, $volname) = @_; - if ($scfg->{path} && $volname =~ m/\.($QEMU_FORMAT_RE)$/) { + if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) { return $1; } else { return "raw"; @@ -6847,7 +6601,7 @@ sub qemu_img_format { } sub qemu_drive_mirror { - my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $skipcomplete, $qga, $bwlimit) = @_; + my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_; $jobs = {} if !$jobs; @@ -6874,6 +6628,12 @@ sub qemu_drive_mirror { my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target }; $opts->{format} = $format if $format; + if (defined($src_bitmap)) { + $opts->{sync} = 'incremental'; + $opts->{bitmap} = $src_bitmap; + print "drive mirror re-using dirty bitmap '$src_bitmap'\n"; + } + if (defined($bwlimit)) { $opts->{speed} = $bwlimit * 1024; print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n"; @@ -6889,11 +6649,17 @@ sub qemu_drive_mirror { die "mirroring error: $err\n"; } - qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete, $qga); + qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga); } +# $completion can be either +# 'complete': wait until all jobs are ready, block-job-complete them (default) +# 'cancel': wait until all jobs are ready, block-job-cancel them +# 'skip': wait until all jobs are ready, return with block jobs in ready state sub qemu_drive_mirror_monitor { - my ($vmid, $vmiddst, $jobs, $skipcomplete, $qga) = @_; + my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_; + + $completion //= 'complete'; eval { my $err_complete = 0; @@ -6938,7 +6704,7 @@ sub qemu_drive_mirror_monitor { if ($readycounter == scalar(keys %$jobs)) { print "all mirroring jobs are ready \n"; - last if $skipcomplete; #do the complete later + last if $completion eq 'skip'; #do the complete later if ($vmiddst && $vmiddst != $vmid) { my $agent_running = $qga && qga_check_running($vmid); @@ -6968,7 +6734,15 @@ sub qemu_drive_mirror_monitor { # try to switch the disk if source and destination are on the same guest print "$job: Completing block job...\n"; - eval { mon_cmd($vmid, "block-job-complete", device => $job) }; + my $op; + if ($completion eq 'complete') { + $op = 'block-job-complete'; + } elsif ($completion eq 'cancel') { + $op = 'block-job-cancel'; + } else { + die "invalid completion value: $completion\n"; + } + eval { mon_cmd($vmid, $op, device => $job) }; if ($@ =~ m/cannot be completed/) { print "$job: Block job cannot be completed, try again.\n"; $err_complete++; @@ -7024,7 +6798,7 @@ sub qemu_blockjobs_cancel { sub clone_disk { my ($storecfg, $vmid, $running, $drivename, $drive, $snapname, - $newvmid, $storage, $format, $full, $newvollist, $jobs, $skipcomplete, $qga, $bwlimit) = @_; + $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_; my $newvolid; @@ -7047,6 +6821,8 @@ sub clone_disk { $name .= ".$dst_format" if $dst_format ne 'raw'; $snapname = undef; $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE; + } elsif ($drivename eq 'efidisk0') { + $size = get_efivars_size($conf); } $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)); push @$newvollist, $newvolid; @@ -7060,7 +6836,16 @@ sub clone_disk { my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid); if (!$running || $snapname) { # TODO: handle bwlimits - qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit); + if ($drivename eq 'efidisk0') { + # the relevant data on the efidisk may be smaller than the source + # e.g. on RBD/ZFS, so we use dd to copy only the amount + # that is given by the OVMF_VARS.fd + my $src_path = PVE::Storage::path($storecfg, $drive->{file}); + my $dst_path = PVE::Storage::path($storecfg, $newvolid); + run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=1", "count=$size", "if=$src_path", "of=$dst_path"]); + } else { + qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit); + } } else { my $kvmver = get_running_qemu_version ($vmid); @@ -7069,7 +6854,7 @@ sub clone_disk { if $drive->{iothread}; } - qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs, $skipcomplete, $qga, $bwlimit); + qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs, $completion, $qga, $bwlimit); } } @@ -7112,6 +6897,26 @@ sub qemu_use_old_bios_files { return ($use_old_bios_files, $machine_type); } +sub get_efivars_size { + my ($conf) = @_; + my $arch = get_vm_arch($conf); + my (undef, $ovmf_vars) = get_ovmf_files($arch); + die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars; + return -s $ovmf_vars; +} + +sub update_efidisk_size { + my ($conf) = @_; + + return if !defined($conf->{efidisk0}); + + my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0}); + $disk->{size} = get_efivars_size($conf); + $conf->{efidisk0} = print_drive($disk); + + return; +} + sub create_efidisk($$$$$) { my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_; @@ -7161,45 +6966,6 @@ sub scsihw_infos { return ($maxdev, $controller, $controller_prefix); } -sub add_hyperv_enlightenments { - my ($cpuFlags, $winversion, $machine_version, $bios, $gpu_passthrough, $hv_vendor_id) = @_; - - return if $winversion < 6; - return if $bios && $bios eq 'ovmf' && $winversion < 8; - - if ($gpu_passthrough || defined($hv_vendor_id)) { - $hv_vendor_id //= 'proxmox'; - push @$cpuFlags , "hv_vendor_id=$hv_vendor_id"; - } - - if (min_version($machine_version, 2, 3)) { - push @$cpuFlags , 'hv_spinlocks=0x1fff'; - push @$cpuFlags , 'hv_vapic'; - push @$cpuFlags , 'hv_time'; - } else { - push @$cpuFlags , 'hv_spinlocks=0xffff'; - } - - if (min_version($machine_version, 2, 6)) { - push @$cpuFlags , 'hv_reset'; - push @$cpuFlags , 'hv_vpindex'; - push @$cpuFlags , 'hv_runtime'; - } - - if ($winversion >= 7) { - push @$cpuFlags , 'hv_relaxed'; - - if (min_version($machine_version, 2, 12)) { - push @$cpuFlags , 'hv_synic'; - push @$cpuFlags , 'hv_stimer'; - } - - if (min_version($machine_version, 3, 1)) { - push @$cpuFlags , 'hv_ipi'; - } - } -} - sub windows_version { my ($ostype) = @_; @@ -7238,19 +7004,6 @@ sub resolve_dst_disk_format { return $format; } -sub resolve_first_disk { - my $conf = shift; - my @disks = PVE::QemuServer::valid_drive_names(); - my $firstdisk; - foreach my $ds (reverse @disks) { - next if !$conf->{$ds}; - my $disk = PVE::QemuServer::parse_drive($ds, $conf->{$ds}); - next if PVE::QemuServer::drive_is_cdrom($disk); - $firstdisk = $ds; - } - return $firstdisk; -} - # NOTE: if this logic changes, please update docs & possibly gui logic sub find_vmstate_storage { my ($conf, $storecfg) = @_; @@ -7329,7 +7082,7 @@ sub complete_backup_archives { my $res = []; foreach my $id (keys %$data) { foreach my $item (@{$data->{$id}}) { - next if $item->{format} !~ m/^vma\.(gz|lzo)$/; + next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/; push @$res, $item->{volid} if defined($item->{volid}); } }