use IO::Socket::UNIX;
use IPC::Open3;
use JSON;
+use List::Util qw(first);
use MIME::Base64;
use POSIX;
use Storable qw(dclone);
-use Time::HiRes qw(gettimeofday);
+use Time::HiRes qw(gettimeofday usleep);
use URI::Escape;
use UUID;
use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
+use PVE::CGroup;
+use PVE::CpuSet;
use PVE::DataCenterConfig;
use PVE::Exception qw(raise raise_param_exc);
+use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
+use PVE::HA::Config;
+use PVE::Mapping::PCI;
+use PVE::Mapping::USB;
use PVE::INotify;
-use PVE::JSONSchema qw(get_standard_option);
+use PVE::JSONSchema qw(get_standard_option parse_property_string);
use PVE::ProcFSTools;
+use PVE::PBSClient;
+use PVE::RESTEnvironment qw(log_warn);
use PVE::RPCEnvironment;
use PVE::Storage;
use PVE::SysFSTools;
use PVE::QMPClient;
use PVE::QemuConfig;
-use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
+use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version);
use PVE::QemuServer::Cloudinit;
-use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
-use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom parse_drive print_drive);
+use PVE::QemuServer::CGroup;
+use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options get_cpu_bitness is_native_arch);
+use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
use PVE::QemuServer::Machine;
-use PVE::QemuServer::Memory;
+use PVE::QemuServer::Memory qw(get_current_memory);
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
-use PVE::QemuServer::USB qw(parse_usb_device);
+use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
+use PVE::QemuServer::USB;
my $have_sdn;
eval {
require PVE::Network::SDN::Zones;
+ require PVE::Network::SDN::Vnets;
$have_sdn = 1;
};
my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
my $OVMF = {
- x86_64 => [
- "$EDK2_FW_BASE/OVMF_CODE.fd",
- "$EDK2_FW_BASE/OVMF_VARS.fd"
- ],
- aarch64 => [
- "$EDK2_FW_BASE/AAVMF_CODE.fd",
- "$EDK2_FW_BASE/AAVMF_VARS.fd"
- ],
+ x86_64 => {
+ '4m-no-smm' => [
+ "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
+ "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
+ ],
+ '4m-no-smm-ms' => [
+ "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
+ "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
+ ],
+ '4m' => [
+ "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
+ "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
+ ],
+ '4m-ms' => [
+ "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
+ "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
+ ],
+ # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build
+ # anymore. how can we deperacate this sanely without breaking existing instances, or using
+ # older backups and snapshot?
+ default => [
+ "$EDK2_FW_BASE/OVMF_CODE.fd",
+ "$EDK2_FW_BASE/OVMF_VARS.fd",
+ ],
+ },
+ aarch64 => {
+ default => [
+ "$EDK2_FW_BASE/AAVMF_CODE.fd",
+ "$EDK2_FW_BASE/AAVMF_VARS.fd",
+ ],
+ },
};
my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
-# Note about locking: we use flock on the config file protect
-# against concurent actions.
-# Aditionaly, we have a 'lock' setting in the config file. This
-# can be set to 'migrate', 'backup', 'snapshot' or 'rollback'. Most actions are not
-# allowed when such lock is set. But you can ignore this kind of
-# lock with the --skiplock flag.
+# Note about locking: we use flock on the config file protect against concurent actions.
+# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
+# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
+# But you can ignore this kind of lock with the --skiplock flag.
-cfs_register_file('/qemu-server/',
- \&parse_vm_config,
- \&write_vm_config);
+cfs_register_file(
+ '/qemu-server/',
+ \&parse_vm_config,
+ \&write_vm_config
+);
PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
description => "Some command save/restore state from this location.",
optional => 1,
});
-PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
- description => "Specifies the Qemu machine type.",
- type => 'string',
- pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
- maxLength => 40,
- optional => 1,
-});
-
-
-sub map_storage {
- my ($map, $source) = @_;
-
- return $source if !defined($map);
-
- return $map->{entries}->{$source}
- if $map->{entries} && defined($map->{entries}->{$source});
-
- return $map->{default} if $map->{default};
-
- # identity (fallback)
- return $source;
-}
-
-PVE::JSONSchema::register_standard_option('pve-targetstorage', {
- description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
- type => 'string',
- format => 'storagepair-list',
- optional => 1,
-});
-
-#no warnings 'redefine';
-
-sub cgroups_write {
- my ($controller, $vmid, $option, $value) = @_;
-
- my $path = "/sys/fs/cgroup/$controller/qemu.slice/$vmid.scope/$option";
- PVE::ProcFSTools::write_proc_entry($path, $value);
-
-}
-
+# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
my $nodename_cache;
sub nodename {
$nodename_cache //= PVE::INotify::nodename();
my $agent_fmt = {
enabled => {
- description => "Enable/disable Qemu GuestAgent.",
+ description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
type => 'boolean',
default => 0,
default_key => 1,
},
fstrim_cloned_disks => {
- description => "Run fstrim after cloning/moving a disk.",
+ description => "Run fstrim after moving a disk or migrating the VM.",
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ },
+ 'freeze-fs-on-backup' => {
+ description => "Freeze/thaw guest filesystems on backup for consistency.",
type => 'boolean',
optional => 1,
- default => 0
+ default => 1,
},
type => {
description => "Select the agent type",
default => 'std',
optional => 1,
default_key => 1,
- enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
+ enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
},
memory => {
description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
minimum => 4,
maximum => 512,
},
+ clipboard => {
+ description => 'Enable a specific clipboard. If not set, depending on the display type the'
+ .' SPICE one will be added. Migration with VNC clipboard is not yet supported!',
+ type => 'string',
+ enum => ['vnc'],
+ optional => 1,
+ },
};
my $ivshmem_fmt = {
},
driver => {
type => 'string',
- enum => ['spice'],
+ enum => ['spice', 'none'],
default => 'spice',
optional => 1,
description => "Driver backend for the audio device."
type => 'string',
enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
default_key => 1,
- description => "The file on the host to gather entropy from. In most"
- . " cases /dev/urandom should be preferred over /dev/random"
- . " to avoid entropy-starvation issues on the host. Using"
- . " urandom does *not* decrease security in any meaningful"
- . " way, as it's still seeded from real entropy, and the"
- . " bytes provided will most likely be mixed with real"
- . " entropy on the guest as well. /dev/hwrng can be used"
- . " to pass through a hardware RNG from the host.",
+ description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
+ ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
+ ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
+ ." still seeded from real entropy, and the bytes provided will most likely be mixed"
+ ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
+ ." a hardware RNG from the host.",
},
max_bytes => {
type => 'integer',
- description => "Maximum bytes of entropy injected into the guest every"
- . " 'period' milliseconds. Prefer a lower value when using"
- . " /dev/random as source. Use 0 to disable limiting"
- . " (potentially dangerous!).",
+ description => "Maximum bytes of entropy allowed to get injected into the guest every"
+ ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
+ ." `0` to disable limiting (potentially dangerous!).",
optional => 1,
- # default is 1 KiB/s, provides enough entropy to the guest to avoid
- # boot-starvation issues (e.g. systemd etc...) while allowing no chance
- # of overwhelming the host, provided we're reading from /dev/urandom
+ # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
+ # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
+ # reading from /dev/urandom
default => 1024,
},
period => {
type => 'integer',
- description => "Every 'period' milliseconds the entropy-injection quota"
- . " is reset, allowing the guest to retrieve another"
- . " 'max_bytes' of entropy.",
+ description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
+ ." the guest to retrieve another 'max_bytes' of entropy.",
optional => 1,
default => 1000,
},
};
+my $meta_info_fmt = {
+ 'ctime' => {
+ type => 'integer',
+ description => "The guest creation timestamp as UNIX epoch time",
+ minimum => 0,
+ optional => 1,
+ },
+ 'creation-qemu' => {
+ type => 'string',
+ description => "The QEMU (machine) version from the time this VM was created.",
+ pattern => '\d+(\.\d+)+',
+ optional => 1,
+ },
+};
+
my $confdesc = {
onboot => {
optional => 1,
default => 0,
},
hotplug => {
- optional => 1,
- type => 'string', format => 'pve-hotplug-features',
- description => "Selectively enable hotplug features. This is a comma separated list of hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable hotplug completely. Value '1' is an alias for the default 'network,disk,usb'.",
+ optional => 1,
+ type => 'string', format => 'pve-hotplug-features',
+ description => "Selectively enable hotplug features. This is a comma separated list of"
+ ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
+ ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
+ ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
+ ." windows > 7.",
default => 'network,disk,usb',
},
reboot => {
optional => 1,
type => 'number',
description => "Limit of CPU usage.",
- verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
+ verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
+ ." total of '2' CPU time. Value '0' indicates no CPU limit.",
minimum => 0,
maximum => 128,
- default => 0,
+ default => 0,
},
cpuunits => {
optional => 1,
type => 'integer',
- description => "CPU weight for a VM.",
- verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.",
- minimum => 2,
+ description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
+ verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
+ ." The larger the number is, the more CPU time this VM gets. Number is relative to"
+ ." weights of all the other running VMs.",
+ minimum => 1,
maximum => 262144,
- default => 1024,
+ default => 'cgroup v1: 1024, cgroup v2: 100',
},
memory => {
optional => 1,
- type => 'integer',
- description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
- minimum => 16,
- default => 512,
+ type => 'string',
+ description => "Memory properties.",
+ format => $PVE::QemuServer::Memory::memory_fmt
},
balloon => {
- optional => 1,
- type => 'integer',
- description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
+ optional => 1,
+ type => 'integer',
+ description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
minimum => 0,
},
shares => {
- optional => 1,
- type => 'integer',
- description => "Amount of memory shares for auto-ballooning. The larger the number is, the more memory this VM gets. Number is relative to weights of all other running VMs. Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
+ optional => 1,
+ type => 'integer',
+ description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
+ ." more memory this VM gets. Number is relative to weights of all other running VMs."
+ ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
minimum => 0,
maximum => 50000,
default => 1000,
keyboard => {
optional => 1,
type => 'string',
- description => "Keybord layout for vnc server. Default is read from the '/etc/pve/datacenter.cfg' configuration file.".
- "It should not be necessary to set it.",
+ description => "Keyboard layout for VNC server. This option is generally not required and"
+ ." is often better handled from within the guest OS.",
enum => PVE::Tools::kvmkeymaplist(),
default => undef,
},
description => {
optional => 1,
type => 'string',
- description => "Description for the VM. Only used on the configuration web interface. This is saved as comment inside the configuration file.",
+ description => "Description for the VM. Shown in the web-interface VM's summary."
+ ." This is saved as comment inside the configuration file.",
+ maxLength => 1024 * 8,
},
ostype => {
optional => 1,
type => 'string',
- enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 l24 l26 solaris)],
+ # NOTE: When extending, also consider extending `%guest_types` in `Import/ESXi.pm`.
+ enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
description => "Specify guest operating system.",
verbose_description => <<EODESC,
Specify guest operating system. This is used to enable special
wvista;; Microsoft Windows Vista
win7;; Microsoft Windows 7
win8;; Microsoft Windows 8/2012/2012r2
-win10;; Microsoft Windows 10/2016
+win10;; Microsoft Windows 10/2016/2019
+win11;; Microsoft Windows 11/2022
l24;; Linux 2.4 Kernel
-l26;; Linux 2.6 - 5.X Kernel
+l26;; Linux 2.6 - 6.X Kernel
solaris;; Solaris/OpenSolaris/OpenIndiania kernel
EODESC
},
boot => {
optional => 1,
- type => 'string',
- description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n).",
- pattern => '[acdn]{1,4}',
- default => 'cdn',
+ type => 'string', format => 'pve-qm-boot',
+ description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
+ ." key or 'legacy=' is deprecated.",
},
bootdisk => {
optional => 1,
type => 'string', format => 'pve-qm-bootdisk',
- description => "Enable booting from specified disk.",
+ description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
pattern => '(ide|sata|scsi|virtio)\d+',
},
smp => {
description => "Enable/disable hugepages memory.",
enum => [qw(any 2 1024)],
},
+ keephugepages => {
+ optional => 1,
+ type => 'boolean',
+ default => 0,
+ description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
+ ." after VM shutdown and can be used for subsequent starts.",
+ },
vcpus => {
optional => 1,
type => 'integer',
},
agent => {
optional => 1,
- description => "Enable/disable Qemu GuestAgent and its properties.",
+ description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
type => 'string',
format => $agent_fmt,
},
localtime => {
optional => 1,
type => 'boolean',
- description => "Set the real time clock to local time. This is enabled by default if ostype indicates a Microsoft OS.",
+ description => "Set the real time clock (RTC) to local time. This is enabled by default if"
+ ." the `ostype` indicates a Microsoft Windows OS.",
},
freeze => {
optional => 1,
optional => 1,
type => 'string', format => $vga_fmt,
description => "Configure the VGA hardware.",
- verbose_description => "Configure the VGA Hardware. If you want to use ".
- "high resolution modes (>= 1280x1024x16) you may need to increase " .
- "the vga memory option. Since QEMU 2.9 the default VGA display type " .
- "is 'std' for all OS types besides some Windows versions (XP and " .
- "older) which use 'cirrus'. The 'qxl' option enables the SPICE " .
- "display server. For win* OS you can select how many independent " .
- "displays you want, Linux guests can add displays them self.\n".
- "You can also run without any graphic card, using a serial device as terminal.",
+ verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
+ ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
+ ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
+ ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
+ ." display server. For win* OS you can select how many independent displays you want,"
+ ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
+ ." using a serial device as terminal.",
},
watchdog => {
optional => 1,
type => 'string', format => 'pve-qm-watchdog',
description => "Create a virtual hardware watchdog device.",
- verbose_description => "Create a virtual hardware watchdog device. Once enabled" .
- " (by a guest action), the watchdog must be periodically polled " .
- "by an agent inside the guest or else the watchdog will reset " .
- "the guest (or execute the respective action specified)",
+ verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
+ ." action), the watchdog must be periodically polled by an agent inside the guest or"
+ ." else the watchdog will reset the guest (or execute the respective action specified)",
},
startdate => {
optional => 1,
type => 'string',
typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
- description => "Set the initial date of the real time clock. Valid format for date are: 'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
+ description => "Set the initial date of the real time clock. Valid format for date are:"
+ ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
default => 'now',
},
verbose_description => <<EODESCR,
Arbitrary arguments passed to kvm, for example:
-args: -no-reboot -no-hpet
+args: -no-reboot -smbios 'type=0,vendor=FOO'
NOTE: this option is for experts only.
EODESCR
type => 'boolean',
default => 1,
description => "Enable/disable the USB tablet device.",
- verbose_description => "Enable/disable the USB tablet device. This device is " .
- "usually needed to allow absolute mouse positioning with VNC. " .
- "Else the mouse runs out of sync with normal VNC clients. " .
- "If you're running lots of console-only guests on one host, " .
- "you may consider disabling this to save some context switches. " .
- "This is turned off by default if you use spice (-vga=qxl).",
+ verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
+ ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
+ ." normal VNC clients. If you're running lots of console-only guests on one host, you"
+ ." may consider disabling this to save some context switches. This is turned off by"
+ ." default if you use spice (`qm set <vmid> --vga qxl`).",
},
migrate_speed => {
optional => 1,
vmstate => {
optional => 1,
type => 'string', format => 'pve-volume-id',
- description => "Reference to a volume which stores the VM state. This is used internally for snapshots.",
+ description => "Reference to a volume which stores the VM state. This is used internally"
+ ." for snapshots.",
},
vmstatestorage => get_standard_option('pve-storage-id', {
description => "Default storage for VM state volumes/files.",
optional => 1,
}),
runningmachine => get_standard_option('pve-qemu-machine', {
- description => "Specifies the QEMU machine type of the running vm. This is used internally for snapshots.",
+ description => "Specifies the QEMU machine type of the running vm. This is used internally"
+ ." for snapshots.",
}),
runningcpu => {
- description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used internally for snapshots.",
+ description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
+ ." internally for snapshots.",
optional => 1,
type => 'string',
pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
protection => {
optional => 1,
type => 'boolean',
- description => "Sets the protection flag of the VM. This will disable the remove VM and remove disk operations.",
+ description => "Sets the protection flag of the VM. This will disable the remove VM and"
+ ." remove disk operations.",
default => 0,
},
bios => {
type => 'string',
pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
format_description => 'UUID',
- description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0' to disable explicitly.",
- verbose_description => "The VM generation ID (vmgenid) device exposes a".
- " 128-bit integer value identifier to the guest OS. This allows to".
- " notify the guest operating system when the virtual machine is".
- " executed with a different configuration (e.g. snapshot execution".
- " or creation from a template). The guest operating system notices".
- " the change, and is then able to react as appropriate by marking".
- " its copies of distributed databases as dirty, re-initializing its".
- " random number generator, etc.\n".
- "Note that auto-creation only works when done throug API/CLI create".
- " or update methods, but not when manually editing the config file.",
+ description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
+ ." to disable explicitly.",
+ verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
+ ." value identifier to the guest OS. This allows to notify the guest operating system"
+ ." when the virtual machine is executed with a different configuration (e.g. snapshot"
+ ." execution or creation from a template). The guest operating system notices the"
+ ." change, and is then able to react as appropriate by marking its copies of"
+ ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
+ ."Note that auto-creation only works when done through API/CLI create or update methods"
+ .", but not when manually editing the config file.",
default => "1 (autogenerated)",
optional => 1,
},
ivshmem => {
type => 'string',
format => $ivshmem_fmt,
- description => "Inter-VM shared memory. Useful for direct communication between VMs, or to the host.",
+ description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
+ ." the host.",
optional => 1,
},
audio0 => {
description => "Configure a VirtIO-based Random Number Generator.",
optional => 1,
},
+ meta => {
+ type => 'string',
+ format => $meta_info_fmt,
+ description => "Some (read-only) meta-information about this guest.",
+ optional => 1,
+ },
+ affinity => {
+ type => 'string', format => 'pve-cpuset',
+ description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
+ optional => 1,
+ },
};
my $cicustom_fmt = {
meta => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all meta data passed to the VM via cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
+ description => 'Specify a custom file containing all meta data passed to the VM via"
+ ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
format => 'pve-volume-id',
format_description => 'volume',
},
network => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all network data passed to the VM via cloud-init.',
+ description => 'To pass a custom file containing all network data to the VM via cloud-init.',
format => 'pve-volume-id',
format_description => 'volume',
},
user => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all user data passed to the VM via cloud-init.',
+ description => 'To pass a custom file containing all user data to the VM via cloud-init.',
+ format => 'pve-volume-id',
+ format_description => 'volume',
+ },
+ vendor => {
+ type => 'string',
+ optional => 1,
+ description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
format => 'pve-volume-id',
format_description => 'volume',
},
};
PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
+# any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
my $confdesc_cloudinit = {
citype => {
optional => 1,
type => 'string',
- description => 'Specifies the cloud-init configuration format. The default depends on the configured operating system type (`ostype`. We use the `nocloud` format for Linux, and `configdrive2` for windows.',
- enum => ['configdrive2', 'nocloud'],
+ description => 'Specifies the cloud-init configuration format. The default depends on the'
+ .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
+ .' and `configdrive2` for windows.',
+ enum => ['configdrive2', 'nocloud', 'opennebula'],
},
ciuser => {
optional => 1,
type => 'string',
- description => "cloud-init: User name to change ssh keys and password for instead of the image's configured default user.",
+ description => "cloud-init: User name to change ssh keys and password for instead of the"
+ ." image's configured default user.",
},
cipassword => {
optional => 1,
type => 'string',
- description => 'cloud-init: Password to assign the user. Using this is generally not recommended. Use ssh keys instead. Also note that older cloud-init versions do not support hashed passwords.',
+ description => 'cloud-init: Password to assign the user. Using this is generally not'
+ .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
+ .' support hashed passwords.',
+ },
+ ciupgrade => {
+ optional => 1,
+ type => 'boolean',
+ description => 'cloud-init: do an automatic package upgrade after the first boot.',
+ default => 1,
},
cicustom => {
optional => 1,
type => 'string',
- description => 'cloud-init: Specify custom files to replace the automatically generated ones at start.',
+ description => 'cloud-init: Specify custom files to replace the automatically generated'
+ .' ones at start.',
format => 'pve-qm-cicustom',
},
searchdomain => {
optional => 1,
type => 'string',
- description => "cloud-init: Sets DNS search domains for a container. Create will automatically use the setting from the host if neither searchdomain nor nameserver are set.",
+ description => 'cloud-init: Sets DNS search domains for a container. Create will'
+ .' automatically use the setting from the host if neither searchdomain nor nameserver'
+ .' are set.',
},
nameserver => {
optional => 1,
type => 'string', format => 'address-list',
- description => "cloud-init: Sets DNS server IP address for a container. Create will automatically use the setting from the host if neither searchdomain nor nameserver are set.",
+ description => 'cloud-init: Sets DNS server IP address for a container. Create will'
+ .' automatically use the setting from the host if neither searchdomain nor nameserver'
+ .' are set.',
},
sshkeys => {
optional => 1,
PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
}
-my $MAX_USB_DEVICES = 5;
my $MAX_NETS = 32;
my $MAX_SERIAL_PORTS = 4;
my $MAX_PARALLEL_PORTS = 3;
-my $MAX_NUMA = 8;
-
-my $numa_fmt = {
- cpus => {
- type => "string",
- pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
- description => "CPUs accessing this NUMA node.",
- format_description => "id[-id];...",
- },
- memory => {
- type => "number",
- description => "Amount of memory this NUMA node provides.",
- optional => 1,
- },
- hostnodes => {
- type => "string",
- pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
- description => "Host NUMA nodes to use.",
- format_description => "id[-id];...",
- optional => 1,
- },
- policy => {
- type => 'string',
- enum => [qw(preferred bind interleave)],
- description => "NUMA allocation policy.",
- optional => 1,
- },
-};
-PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
-my $numadesc = {
- optional => 1,
- type => 'string', format => $numa_fmt,
- description => "NUMA topology.",
-};
-PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
-
-for (my $i = 0; $i < $MAX_NUMA; $i++) {
- $confdesc->{"numa$i"} = $numadesc;
-}
-my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
- 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3',
- 'e1000-82540em', 'e1000-82544gc', 'e1000-82545em'];
+for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) {
+ $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc;
+}
+
+my $nic_model_list = [
+ 'e1000',
+ 'e1000-82540em',
+ 'e1000-82544gc',
+ 'e1000-82545em',
+ 'e1000e',
+ 'i82551',
+ 'i82557b',
+ 'i82559er',
+ 'ne2k_isa',
+ 'ne2k_pci',
+ 'pcnet',
+ 'rtl8139',
+ 'virtio',
+ 'vmxnet3',
+];
my $nic_model_list_txt = join(' ', sort @$nic_model_list);
my $net_fmt_bridge_descr = <<__EOD__;
my $net_fmt = {
macaddr => get_standard_option('mac-addr', {
- description => "MAC address. That address must be unique withing your network. This is automatically generated if not specified.",
+ description => "MAC address. That address must be unique withing your network. This is"
+ ." automatically generated if not specified.",
}),
model => {
type => 'string',
- description => "Network Card Model. The 'virtio' model provides the best performance with very low CPU overhead. If your guest does not support this driver, it is usually best to use 'e1000'.",
+ description => "Network Card Model. The 'virtio' model provides the best performance with"
+ ." very low CPU overhead. If your guest does not support this driver, it is usually"
+ ." best to use 'e1000'.",
enum => $nic_model_list,
default_key => 1,
},
(map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
- bridge => {
- type => 'string',
+ bridge => get_standard_option('pve-bridge-id', {
description => $net_fmt_bridge_descr,
- format_description => 'bridge',
- pattern => '[-_.\w\d]+',
optional => 1,
- },
+ }),
queues => {
type => 'integer',
- minimum => 0, maximum => 16,
+ minimum => 0, maximum => 64,
description => 'Number of packet queues to be used on the device.',
optional => 1,
},
IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
-The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit gateway should be provided.
-For IPv6 the special string 'auto' can be used to use stateless autoconfiguration.
+The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
+gateway should be provided.
+For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
+cloud-init 19.4 or newer.
-If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using dhcp on IPv4.
+If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
+dhcp on IPv4.
EODESCR
};
PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
$confdesc->{$key} = $confdesc_cloudinit->{$key};
}
-PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
-sub verify_volume_id_or_qm_path {
- my ($volid, $noerr) = @_;
+PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
+sub pve_verify_cpuset {
+ my ($set_text, $noerr) = @_;
- if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
- return $volid;
- }
+ my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
- # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
- $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
if ($@) {
- return undef if $noerr;
- die $@;
+ return if $noerr;
+ die "unable to parse cpuset option\n";
}
- return $volid;
+
+ return PVE::CpuSet->new($members)->short_string();
}
-my $usb_fmt = {
- host => {
- default_key => 1,
- type => 'string', format => 'pve-qm-usb-device',
- format_description => 'HOSTUSBDEVICE|spice',
- description => <<EODESCR,
-The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
+PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
+sub verify_volume_id_or_qm_path {
+ my ($volid, $noerr) = @_;
- 'bus-port(.port)*' (decimal numbers) or
- 'vendor_id:product_id' (hexadeciaml numbers) or
- 'spice'
+ return $volid if $volid eq 'none' || $volid eq 'cdrom';
-You can use the 'lsusb -t' command to list existing usb devices.
+ return verify_volume_id_or_absolute_path($volid, $noerr);
+}
-NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
+PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
+sub verify_volume_id_or_absolute_path {
+ my ($volid, $noerr) = @_;
-The value 'spice' can be used to add a usb redirection devices for spice.
-EODESCR
- },
- usb3 => {
- optional => 1,
- type => 'boolean',
- description => "Specifies whether if given host option is a USB3 device or port.",
- default => 0,
- },
-};
+ return $volid if $volid =~ m|^/|;
-my $usbdesc = {
- optional => 1,
- type => 'string', format => $usb_fmt,
- description => "Configure an USB device (n is 0 to 4).",
-};
-PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
+ $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
+ if ($@) {
+ return if $noerr;
+ die $@;
+ }
+ return $volid;
+}
my $serialdesc = {
optional => 1,
host serial device (i.e. /dev/ttyS0), or create a unix socket on the
host side (use 'qm terminal' to open a terminal connection).
-NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines - use with special care.
+NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
+use with special care.
CAUTION: Experimental! User reported problems with this option.
EODESCR
verbose_description => <<EODESCR,
Map host parallel devices (n is 0 to 2).
-NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such machines - use with special care.
+NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
+machines - use with special care.
CAUTION: Experimental! User reported problems with this option.
EODESCR
$confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
}
-for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
- $confdesc->{"usb$i"} = $usbdesc;
+for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
+ $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
+}
+
+my $boot_fmt = {
+ legacy => {
+ optional => 1,
+ default_key => 1,
+ type => 'string',
+ description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
+ . " Deprecated, use 'order=' instead.",
+ pattern => '[acdn]{1,4}',
+ format_description => "[acdn]{1,4}",
+
+ # note: this is also the fallback if boot: is not given at all
+ default => 'cdn',
+ },
+ order => {
+ optional => 1,
+ type => 'string',
+ format => 'pve-qm-bootdev-list',
+ format_description => "device[;device...]",
+ description => <<EODESC,
+The guest will attempt to boot from devices in the order they appear here.
+
+Disks, optical drives and passed-through storage USB devices will be directly
+booted from, NICs will load PXE, and PCIe devices will either behave like disks
+(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
+
+Note that only devices in this list will be marked as bootable and thus loaded
+by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
+(e.g. software-raid), you need to specify all of them here.
+
+Overrides the deprecated 'legacy=[acdn]*' value when given.
+EODESC
+ },
+};
+PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
+
+PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
+sub verify_bootdev {
+ my ($dev, $noerr) = @_;
+
+ my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
+ return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
+
+ my $check = sub {
+ my ($base) = @_;
+ return 0 if $dev !~ m/^$base\d+$/;
+ return 0 if !$confdesc->{$dev};
+ return 1;
+ };
+
+ return $dev if $check->("net");
+ return $dev if $check->("usb");
+ return $dev if $check->("hostpci");
+
+ return if $noerr;
+ die "invalid boot device '$dev'\n";
+}
+
+sub print_bootorder {
+ my ($devs) = @_;
+ return "" if !@$devs;
+ my $data = { order => join(';', @$devs) };
+ return PVE::JSONSchema::print_property_string($data, $boot_fmt);
}
my $kvm_api_version = 0;
sub kvm_version {
return $kvm_api_version if $kvm_api_version;
- open my $fh, '<', '/dev/kvm'
- or return undef;
+ open my $fh, '<', '/dev/kvm' or return;
# 0xae00 => KVM_GET_API_VERSION
$kvm_api_version = ioctl($fh, 0xae00, 0);
+ close($fh);
return $kvm_api_version;
}
return $kvm_user_version->{$binary};
}
+my sub extract_version {
+ my ($machine_type, $version) = @_;
+ $version = kvm_user_version() if !defined($version);
+ return PVE::QemuServer::Machine::extract_version($machine_type, $version)
+}
sub kernel_has_vhost_net {
return -c '/dev/vhost-net';
my $cdrom_path;
sub get_cdrom_path {
- return $cdrom_path if $cdrom_path;
+ return $cdrom_path if defined($cdrom_path);
- return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
- return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
- return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
+ $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
+
+ if (!defined($cdrom_path)) {
+ log_warn("no physical CD-ROM available, ignoring");
+ $cdrom_path = '';
+ }
+
+ return $cdrom_path;
}
sub get_iso_path {
if (!($file eq 'none' || $file eq 'cdrom' ||
$file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
- return undef if $file =~ m|/|;
+ return if $file =~ m|/|;
if ($media && $media eq 'cdrom') {
$file = "local:iso/$file";
($drive->{file} !~ m/^([^:]+):(.+)$/) &&
($drive->{file} !~ m/^\d+$/)) {
my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
- raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"}) if !$vtype;
+ raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
+ if !$vtype;
$drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
verify_media_type($opt, $vtype, $drive->{media});
$drive->{file} = $volid;
$data = $confdesc->{hotplug}->{default} if $data eq '1';
foreach my $feature (PVE::Tools::split_list($data)) {
- if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
+ if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
$res->{$1} = 1;
} else {
die "invalid hotplug feature '$feature'\n";
return $value if parse_hotplug_features($value);
- return undef if $noerr;
+ return if $noerr;
die "unable to parse hotplug option\n";
}
-sub scsi_inquiry {
- my($fh, $noerr) = @_;
-
- my $SG_IO = 0x2285;
- my $SG_GET_VERSION_NUM = 0x2282;
-
- my $versionbuf = "\x00" x 8;
- my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
- if (!$ret) {
- die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
- return undef;
- }
- my $version = unpack("I", $versionbuf);
- if ($version < 30000) {
- die "scsi generic interface too old\n" if !$noerr;
- return undef;
- }
-
- my $buf = "\x00" x 36;
- my $sensebuf = "\x00" x 8;
- my $cmd = pack("C x3 C x1", 0x12, 36);
-
- # see /usr/include/scsi/sg.h
- my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
-
- my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
- length($sensebuf), 0, length($buf), $buf,
- $cmd, $sensebuf, 6000);
+sub assert_clipboard_config {
+ my ($vga) = @_;
- $ret = ioctl($fh, $SG_IO, $packet);
- if (!$ret) {
- die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
- return undef;
- }
+ my $clipboard_regex = qr/^(std|cirrus|vmware|virtio|qxl)/;
- my @res = unpack($sg_io_hdr_t, $packet);
- if ($res[17] || $res[18]) {
- die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
- return undef;
+ if (
+ $vga->{'clipboard'}
+ && $vga->{'clipboard'} eq 'vnc'
+ && $vga->{type}
+ && $vga->{type} !~ $clipboard_regex
+ ) {
+ die "vga type $vga->{type} is not compatible with VNC clipboard\n";
}
-
- my $res = {};
- (my $byte0, my $byte1, $res->{vendor},
- $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
-
- $res->{removable} = $byte1 & 128 ? 1 : 0;
- $res->{type} = $byte0 & 31;
-
- return $res;
-}
-
-sub path_is_scsi {
- my ($path) = @_;
-
- my $fh = IO::File->new("+<$path") || return undef;
- my $res = scsi_inquiry($fh, 1);
- close($fh);
-
- return $res;
}
sub print_tabletdevice_full {
# we use uhci for old VMs because tablet driver was buggy in older qemu
my $usbbus;
- if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
+ if ($q35 || $arch eq 'aarch64') {
$usbbus = 'ehci';
} else {
$usbbus = 'uhci';
}
sub print_keyboarddevice_full {
- my ($conf, $arch, $machine) = @_;
+ my ($conf, $arch) = @_;
- return undef if $arch ne 'aarch64';
+ return if $arch ne 'aarch64';
return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
}
+my sub get_drive_id {
+ my ($drive) = @_;
+ return "$drive->{interface}$drive->{index}";
+}
+
sub print_drivedevice_full {
my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
my $device = '';
my $maxdev = 0;
+ my $drive_id = get_drive_id($drive);
if ($drive->{interface} eq 'virtio') {
- my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges, $arch, $machine_type);
- $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}$pciaddr";
- $device .= ",iothread=iothread-$drive->{interface}$drive->{index}" if $drive->{iothread};
+ my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
+ $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
+ $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
} elsif ($drive->{interface} eq 'scsi') {
my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
my $unit = $drive->{index} % $maxdev;
- my $devicetype = 'hd';
- my $path = '';
- if (drive_is_cdrom($drive)) {
- $devicetype = 'cd';
- } else {
- if ($drive->{file} =~ m|^/|) {
- $path = $drive->{file};
- if (my $info = path_is_scsi($path)) {
- if ($info->{type} == 0 && $drive->{scsiblock}) {
- $devicetype = 'block';
- } elsif ($info->{type} == 1) { # tape
- $devicetype = 'generic';
- }
- }
- } else {
- $path = PVE::Storage::path($storecfg, $drive->{file});
- }
- # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
- my $version = PVE::QemuServer::Machine::extract_version($machine_type, kvm_user_version());
- if ($path =~ m/^iscsi\:\/\// &&
- !min_version($version, 4, 1)) {
- $devicetype = 'generic';
- }
- }
+ my $machine_version = extract_version($machine_type, kvm_user_version());
+ my $device_type = PVE::QemuServer::Drive::get_scsi_device_type(
+ $drive, $storecfg, $machine_version);
- if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){
- $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
+ if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
+ $device = "scsi-$device_type,bus=$controller_prefix$controller.0,scsi-id=$unit";
} else {
- $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
+ $device = "scsi-$device_type,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
+ .",lun=$drive->{index}";
}
+ $device .= ",drive=drive-$drive_id,id=$drive_id";
- if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
+ if ($drive->{ssd} && ($device_type eq 'block' || $device_type eq 'hd')) {
$device .= ",rotation_rate=1";
}
$device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
+ # only scsi-hd and scsi-cd support passing vendor and product information
+ if ($device_type eq 'hd' || $device_type eq 'cd') {
+ if (my $vendor = $drive->{vendor}) {
+ $device .= ",vendor=$vendor";
+ }
+ if (my $product = $drive->{product}) {
+ $device .= ",product=$product";
+ }
+ }
+
} elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
my $controller = int($drive->{index} / $maxdev);
my $unit = $drive->{index} % $maxdev;
- my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
- $device = "ide-$devicetype";
+ # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
+ # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
+ # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
+ # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
+ # were before. Move odd ones up by 2 where they don't clash.
+ if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
+ $controller += 2 * ($unit % 2);
+ $unit = 0;
+ }
+
+ my $device_type = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
+
+ $device = "ide-$device_type";
if ($drive->{interface} eq 'ide') {
$device .= ",bus=ide.$controller,unit=$unit";
} else {
$device .= ",bus=ahci$controller.$unit";
}
- $device .= ",drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}";
+ $device .= ",drive=drive-$drive_id,id=$drive_id";
- if ($devicetype eq 'hd') {
+ if ($device_type eq 'hd') {
if (my $model = $drive->{model}) {
$model = URI::Escape::uri_unescape($model);
$device .= ",model=$model";
sub get_initiator_name {
my $initiator;
- my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return undef;
+ my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
while (defined(my $line = <$fh>)) {
next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
$initiator = $1;
return $initiator;
}
+my sub storage_allows_io_uring_default {
+ my ($scfg, $cache_direct) = @_;
+
+ # io_uring with cache mode writeback or writethrough on krbd will hang...
+ return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
+
+ # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
+ # sometimes, just plain disable...
+ return if $scfg && $scfg->{type} eq 'lvm';
+
+ # io_uring causes problems when used with CIFS since kernel 5.15
+ # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
+ return if $scfg && $scfg->{type} eq 'cifs';
+
+ return 1;
+}
+
+my sub drive_uses_cache_direct {
+ my ($drive, $scfg) = @_;
+
+ my $cache_direct = 0;
+
+ if (my $cache = $drive->{cache}) {
+ $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
+ } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
+ $cache_direct = 1;
+ }
+
+ return $cache_direct;
+}
+
sub print_drive_commandline_full {
- my ($storecfg, $vmid, $drive) = @_;
+ my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_;
my $path;
my $volid = $drive->{file};
- my $format;
+ my $format = $drive->{format};
+ my $drive_id = get_drive_id($drive);
+
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+ my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
if (drive_is_cdrom($drive)) {
$path = get_iso_path($storecfg, $vmid, $volid);
+ die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
} else {
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
if ($storeid) {
$path = PVE::Storage::path($storecfg, $volid);
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
+ $format //= qemu_img_format($scfg, $volname);
} else {
$path = $volid;
- $format = "raw";
+ $format //= "raw";
}
}
+ my $is_rbd = $path =~ m/^rbd:/;
+
my $opts = '';
- my @qemu_drive_options = qw(heads secs cyls trans media format cache rerror werror aio discard);
+ my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
foreach my $o (@qemu_drive_options) {
$opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
}
$opts .= ",snapshot=$v";
}
+ if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
+ $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
+ }
+
foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
my ($dir, $qmpname) = @$type;
if (my $v = $drive->{"mbps$dir"}) {
}
}
- $opts .= ",format=$format" if $format && !$drive->{format};
+ if ($live_restore_name) {
+ $format = "rbd" if $is_rbd;
+ die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
+ if !$format;
+ $opts .= ",format=alloc-track,file.driver=$format";
+ } elsif ($format) {
+ $opts .= ",format=$format";
+ }
- my $cache_direct = 0;
+ my $cache_direct = drive_uses_cache_direct($drive, $scfg);
- if (my $cache = $drive->{cache}) {
- $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
- } elsif (!drive_is_cdrom($drive)) {
- $opts .= ",cache=none";
- $cache_direct = 1;
- }
+ $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
- # aio native works only with O_DIRECT
if (!$drive->{aio}) {
- if($cache_direct) {
- $opts .= ",aio=native";
+ if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
+ # io_uring supports all cache modes
+ $opts .= ",aio=io_uring";
} else {
- $opts .= ",aio=threads";
+ # aio native works only with O_DIRECT
+ if($cache_direct) {
+ $opts .= ",aio=native";
+ } else {
+ $opts .= ",aio=threads";
+ }
}
}
# This used to be our default with discard not being specified:
$detectzeroes = 'on';
}
- $opts .= ",detect-zeroes=$detectzeroes" if $detectzeroes;
+
+ # note: 'detect-zeroes' works per blockdev and we want it to persist
+ # after the alloc-track is removed, so put it on 'file' directly
+ my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes";
+ $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
+ }
+
+ if ($live_restore_name) {
+ $opts .= ",backing=$live_restore_name";
+ $opts .= ",auto-remove=on";
}
- my $pathinfo = $path ? "file=$path," : '';
+ # my $file_param = $live_restore_name ? "file.file.filename" : "file";
+ my $file_param = "file";
+ if ($live_restore_name) {
+ # non-rbd drivers require the underlying file to be a seperate block
+ # node, so add a second .file indirection
+ $file_param .= ".file" if !$is_rbd;
+ $file_param .= ".filename";
+ }
+ my $pathinfo = $path ? "$file_param=$path," : '';
return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
}
-sub print_netdevice_full {
- my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
+sub print_pbs_blockdev {
+ my ($pbs_conf, $pbs_name) = @_;
+ my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
+ $blockdev .= ",repository=$pbs_conf->{repository}";
+ $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
+ $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
+ $blockdev .= ",archive=$pbs_conf->{archive}";
+ $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
+ return $blockdev;
+}
- my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
+sub print_netdevice_full {
+ my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
my $device = $net->{model};
if ($net->{model} eq 'virtio') {
my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
- #Consider we have N queues, the number of vectors needed is 2*N + 2 (plus one config interrupt and control vq)
+ # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
+ # and out of each queue plus one config interrupt and control vector queue
my $vectors = $net->{queues} * 2 + 2;
$tmpstr .= ",vectors=$vectors,mq=on";
+ if (min_version($machine_version, 7, 1)) {
+ $tmpstr .= ",packed=on";
+ }
+ }
+
+ if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
+ $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
}
+
$tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
if (my $mtu = $net->{mtu}) {
$romfile = 'pxe-virtio.rom';
} elsif ($device eq 'e1000') {
$romfile = 'pxe-e1000.rom';
+ } elsif ($device eq 'e1000e') {
+ $romfile = 'pxe-e1000e.rom';
} elsif ($device eq 'ne2k') {
$romfile = 'pxe-ne2k_pci.rom';
} elsif ($device eq 'pcnet') {
if length($ifname) >= 16;
my $vhostparam = '';
- if (is_native($arch)) {
+ if (is_native_arch($arch)) {
$vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
}
my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
if ($net->{bridge}) {
- $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
+ $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
+ .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
} else {
$netdev = "type=user,id=$netid,hostname=$vmname";
}
'std' => 'VGA',
'vmware' => 'vmware-svga',
'virtio' => 'virtio-vga',
+ 'virtio-gl' => 'virtio-vga-gl',
};
sub print_vga_device {
}
}
- die "no devicetype for $vga->{type}\n" if !$type;
+ die "no device-type for $vga->{type}\n" if !$type;
my $memory = "";
if ($vgamem_mb) {
- if ($vga->{type} eq 'virtio') {
+ if ($vga->{type} =~ /^virtio/) {
my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
$memory = ",max_hostmem=$bytes";
} elsif ($qxlnum) {
$memory = ",ram_size=67108864,vram_size=33554432";
}
+ my $edidoff = "";
+ if ($type eq 'VGA' && windows_version($conf->{ostype})) {
+ $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
+ }
+
my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
my $vgaid = "vga" . ($id // '');
my $pciaddr;
-
if ($q35 && $vgaid eq 'vga') {
# the first display uses pcie.0 bus on q35 machines
$pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
$pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
}
- return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}";
-}
+ if ($vga->{type} eq 'virtio-gl') {
+ my $base = '/usr/lib/x86_64-linux-gnu/lib';
+ die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
+ if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
-sub parse_number_sets {
- my ($set) = @_;
- my $res = [];
- foreach my $part (split(/;/, $set)) {
- if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
- die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
- push @$res, [ $1, $2 ];
- } else {
- die "invalid range: $part\n";
- }
+ die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
+ if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
}
- return $res;
-}
-
-sub parse_numa {
- my ($data) = @_;
- my $res = PVE::JSONSchema::parse_property_string($numa_fmt, $data);
- $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
- $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
- return $res;
+ return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
}
# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
sub parse_net {
- my ($data) = @_;
+ my ($data, $disable_mac_autogen) = @_;
- my $res = eval { PVE::JSONSchema::parse_property_string($net_fmt, $data) };
+ my $res = eval { parse_property_string($net_fmt, $data) };
if ($@) {
warn $@;
- return undef;
+ return;
}
- if (!defined($res->{macaddr})) {
+ if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
$res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
}
sub parse_ipconfig {
my ($data) = @_;
- my $res = eval { PVE::JSONSchema::parse_property_string($ipconfig_fmt, $data) };
+ my $res = eval { parse_property_string($ipconfig_fmt, $data) };
if ($@) {
warn $@;
- return undef;
+ return;
}
if ($res->{gw} && !$res->{ip}) {
warn 'gateway specified without specifying an IP address';
- return undef;
+ return;
}
if ($res->{gw6} && !$res->{ip6}) {
warn 'IPv6 gateway specified without specifying an IPv6 address';
- return undef;
+ return;
}
if ($res->{gw} && $res->{ip} eq 'dhcp') {
warn 'gateway specified together with DHCP';
- return undef;
+ return;
}
if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
# gw6 + auto/dhcp
warn "IPv6 gateway specified together with $res->{ip6} address";
- return undef;
+ return;
}
if (!$res->{ip} && !$res->{ip6}) {
}
}
- return undef;
+ return;
}
sub vmconfig_register_unused_drive {
if (drive_is_cloudinit($drive)) {
eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
warn $@ if $@;
+ delete $conf->{cloudinit};
} elsif (!drive_is_cdrom($drive)) {
my $volid = $drive->{file};
if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
sub parse_smbios1 {
my ($data) = @_;
- my $res = eval { PVE::JSONSchema::parse_property_string($smbios1_fmt, $data) };
+ my $res = eval { parse_property_string($smbios1_fmt, $data) };
warn $@ if $@;
return $res;
}
sub parse_watchdog {
my ($value) = @_;
- return undef if !$value;
+ return if !$value;
- my $res = eval { PVE::JSONSchema::parse_property_string($watchdog_fmt, $value) };
+ my $res = eval { parse_property_string($watchdog_fmt, $value) };
warn $@ if $@;
return $res;
}
sub parse_guest_agent {
- my ($value) = @_;
+ my ($conf) = @_;
- return {} if !defined($value->{agent});
+ return {} if !defined($conf->{agent});
- my $res = eval { PVE::JSONSchema::parse_property_string($agent_fmt, $value->{agent}) };
+ my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
warn $@ if $@;
# if the agent is disabled ignore the other potentially set properties
return $res;
}
-sub parse_vga {
+sub get_qga_key {
+ my ($conf, $key) = @_;
+ return undef if !defined($conf->{agent});
+
+ my $agent = parse_guest_agent($conf);
+ return $agent->{$key};
+}
+
+sub parse_vga {
my ($value) = @_;
return {} if !$value;
- my $res = eval { PVE::JSONSchema::parse_property_string($vga_fmt, $value) };
+ my $res = eval { parse_property_string($vga_fmt, $value) };
warn $@ if $@;
return $res;
}
sub parse_rng {
my ($value) = @_;
- return undef if !$value;
+ return if !$value;
- my $res = eval { PVE::JSONSchema::parse_property_string($rng_fmt, $value) };
+ my $res = eval { parse_property_string($rng_fmt, $value) };
warn $@ if $@;
return $res;
}
-PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
-sub verify_usb_device {
- my ($value, $noerr) = @_;
+sub parse_meta_info {
+ my ($value) = @_;
+
+ return if !$value;
+
+ my $res = eval { parse_property_string($meta_info_fmt, $value) };
+ warn $@ if $@;
+ return $res;
+}
+
+sub new_meta_info_string {
+ my () = @_; # for now do not allow to override any value
- return $value if parse_usb_device($value);
+ return PVE::JSONSchema::print_property_string(
+ {
+ 'creation-qemu' => kvm_user_version(),
+ ctime => "". int(time()),
+ },
+ $meta_info_fmt
+ );
+}
- return undef if $noerr;
+sub qemu_created_version_fixups {
+ my ($conf, $forcemachine, $kvmver) = @_;
- die "unable to parse usb device\n";
+ my $meta = parse_meta_info($conf->{meta}) // {};
+ my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
+
+ # check if we need to apply some handling for VMs that always use the latest machine version but
+ # had a machine version transition happen that affected HW such that, e.g., an OS config change
+ # would be required (we do not want to pin machine version for non-windows OS type)
+ my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
+ if (
+ (!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
+ && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
+ && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
+ && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
+ ) {
+ my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
+ if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
+ # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
+ # and thus with the predictable interface naming of systemd
+ return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
+ }
+ }
+ return;
}
# add JSON properties for create and set function
sub json_config_properties {
- my $prop = shift;
+ my ($prop, $with_disk_alloc) = @_;
+
+ my $skip_json_config_opts = {
+ parent => 1,
+ snaptime => 1,
+ vmstate => 1,
+ runningmachine => 1,
+ runningcpu => 1,
+ meta => 1,
+ };
foreach my $opt (keys %$confdesc) {
- next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'vmstate' ||
- $opt eq 'runningmachine' || $opt eq 'runningcpu';
- $prop->{$opt} = $confdesc->{$opt};
+ next if $skip_json_config_opts->{$opt};
+
+ if ($with_disk_alloc && is_valid_drivename($opt)) {
+ $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
+ } else {
+ $prop->{$opt} = $confdesc->{$opt};
+ }
+ }
+
+ return $prop;
+}
+
+# Properties that we can read from an OVF file
+sub json_ovf_properties {
+ my $prop = {};
+
+ for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
+ $prop->{$device} = {
+ type => 'string',
+ format => 'pve-volume-id-or-absolute-path',
+ description => "Disk image that gets imported to $device",
+ optional => 1,
+ };
}
+ $prop->{cores} = {
+ type => 'integer',
+ description => "The number of CPU cores.",
+ optional => 1,
+ };
+ $prop->{memory} = {
+ type => 'integer',
+ description => "Amount of RAM for the VM in MB.",
+ optional => 1,
+ };
+ $prop->{name} = {
+ type => 'string',
+ description => "Name of the VM.",
+ optional => 1,
+ };
+
return $prop;
}
return dclone($confdesc_cloudinit);
}
+sub cloudinit_pending_properties {
+ my $p = {
+ map { $_ => 1 } keys $confdesc_cloudinit->%*,
+ name => 1,
+ };
+ $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
+ return $p;
+}
+
sub check_type {
my ($key, $value) = @_;
}
sub destroy_vm {
- my ($storecfg, $vmid, $skiplock, $replacement_conf) = @_;
+ my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
my $conf = PVE::QemuConfig->load_config($vmid);
- PVE::QemuConfig->check_lock($conf) if !$skiplock;
+ if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
+ PVE::QemuConfig->check_lock($conf);
+ }
if ($conf->{template}) {
# check if any base image is still used by a linked clone
- PVE::QemuConfig->foreach_volume($conf, sub {
+ PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive);
});
}
- # only remove disks owned by this VM
- PVE::QemuConfig->foreach_volume($conf, sub {
+ my $volids = {};
+ my $remove_owned_drive = sub {
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive, 1);
my $volid = $drive->{file};
return if !$volid || $volid =~ m|^/|;
+ return if $volids->{$volid};
my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
return if !$path || !$owner || ($owner != $vmid);
+ $volids->{$volid} = 1;
eval { PVE::Storage::vdisk_free($storecfg, $volid) };
warn "Could not remove disk '$volid', check manually: $@" if $@;
- });
+ };
- # also remove unused disk
- my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
- PVE::Storage::foreach_volid($vmdisks, sub {
- my ($volid, $sid, $volname, $d) = @_;
- eval { PVE::Storage::vdisk_free($storecfg, $volid) };
- warn $@ if $@;
- });
+ # only remove disks owned by this VM (referenced in the config)
+ my $include_opts = {
+ include_unused => 1,
+ extra_keys => ['vmstate'],
+ };
+ PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
+
+ for my $snap (values %{$conf->{snapshots}}) {
+ next if !defined($snap->{vmstate});
+ my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
+ next if !defined($drive);
+ $remove_owned_drive->('vmstate', $drive);
+ }
+
+ PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
+
+ if ($purge_unreferenced) { # also remove unreferenced disk
+ my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
+ PVE::Storage::foreach_volid($vmdisks, sub {
+ my ($volid, $sid, $volname, $d) = @_;
+ eval { PVE::Storage::vdisk_free($storecfg, $volid) };
+ warn $@ if $@;
+ });
+ }
+
+ eval { delete_ifaces_ipams_ips($conf, $vmid)};
+ warn $@ if $@;
if (defined $replacement_conf) {
PVE::QemuConfig->write_config($vmid, $replacement_conf);
}
sub parse_vm_config {
- my ($filename, $raw) = @_;
+ my ($filename, $raw, $strict) = @_;
- return undef if !defined($raw);
+ return if !defined($raw);
my $res = {
digest => Digest::SHA::sha1_hex($raw),
snapshots => {},
pending => {},
+ cloudinit => {},
+ };
+
+ my $handle_error = sub {
+ my ($msg) = @_;
+
+ if ($strict) {
+ die $msg;
+ } else {
+ warn $msg;
+ }
};
$filename =~ m|/qemu-server/(\d+)\.conf$|
my $conf = $res;
my $descr;
+ my $finish_description = sub {
+ if (defined($descr)) {
+ $descr =~ s/\s+$//;
+ $conf->{description} = $descr;
+ }
+ $descr = undef;
+ };
my $section = '';
my @lines = split(/\n/, $raw);
if ($line =~ m/^\[PENDING\]\s*$/i) {
$section = 'pending';
- if (defined($descr)) {
- $descr =~ s/\s+$//;
- $conf->{description} = $descr;
- }
- $descr = undef;
+ $finish_description->();
+ $conf = $res->{$section} = {};
+ next;
+ } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
+ $section = 'cloudinit';
+ $finish_description->();
$conf = $res->{$section} = {};
next;
} elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
$section = $1;
- if (defined($descr)) {
- $descr =~ s/\s+$//;
- $conf->{description} = $descr;
- }
- $descr = undef;
+ $finish_description->();
$conf = $res->{snapshots}->{$section} = {};
next;
}
- if ($line =~ m/^\#(.*)\s*$/) {
+ if ($line =~ m/^\#(.*)$/) {
$descr = '' if !defined($descr);
$descr .= PVE::Tools::decode_text($1) . "\n";
next;
if ($section eq 'pending') {
$conf->{delete} = $value; # we parse this later
} else {
- warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
+ $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
}
} elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
my $key = $1;
my $value = $2;
+ if ($section eq 'cloudinit') {
+ # ignore validation only used for informative purpose
+ $conf->{$key} = $value;
+ next;
+ }
eval { $value = check_type($key, $value); };
if ($@) {
- warn "vm $vmid - unable to parse value of '$key' - $@";
+ $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
} else {
$key = 'ide2' if $key eq 'cdrom';
my $fmt = $confdesc->{$key}->{format};
$v->{file} = $volid;
$value = print_drive($v);
} else {
- warn "vm $vmid - unable to parse value of '$key'\n";
+ $handle_error->("vm $vmid - unable to parse value of '$key'\n");
next;
}
}
$conf->{$key} = $value;
}
+ } else {
+ $handle_error->("vm $vmid - unable to parse config: $line\n");
}
}
- if (defined($descr)) {
- $descr =~ s/\s+$//;
- $conf->{description} = $descr;
- }
+ $finish_description->();
delete $res->{snapstate}; # just to be sure
return $res;
foreach my $key (keys %$cref) {
next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
- $key eq 'snapstate' || $key eq 'pending';
+ $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
my $value = $cref->{$key};
if ($key eq 'delete') {
die "propertry 'delete' is only allowed in [PENDING]\n"
}
foreach my $key (sort keys %$conf) {
- next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' || $key eq 'snapshots';
+ next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
$raw .= "$key: $conf->{$key}\n";
}
return $raw;
$raw .= &$generate_raw_config($conf->{pending}, 1);
}
+ if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
+ $raw .= "\n[special:cloudinit]\n";
+ $raw .= &$generate_raw_config($conf->{cloudinit});
+ }
+
foreach my $snapname (sort keys %{$conf->{snapshots}}) {
$raw .= "\n[$snapname]\n";
$raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
my ($conf, $noerr) = @_;
my @loc_res = ();
+ my $mapped_res = [];
+
+ my $nodelist = PVE::Cluster::get_nodelist();
+ my $pci_map = PVE::Mapping::PCI::config();
+ my $usb_map = PVE::Mapping::USB::config();
+
+ my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
+
+ my $add_missing_mapping = sub {
+ my ($type, $key, $id) = @_;
+ for my $node (@$nodelist) {
+ my $entry;
+ if ($type eq 'pci') {
+ $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
+ } elsif ($type eq 'usb') {
+ $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
+ }
+ if (!scalar($entry->@*)) {
+ push @{$missing_mappings_by_node->{$node}}, $key;
+ }
+ }
+ };
push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
push @loc_res, "ivshmem" if $conf->{ivshmem};
foreach my $k (keys %$conf) {
- next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
+ if ($k =~ m/^usb/) {
+ my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
+ next if $entry->{host} && $entry->{host} =~ m/^spice$/i;
+ if ($entry->{mapping}) {
+ $add_missing_mapping->('usb', $k, $entry->{mapping});
+ push @$mapped_res, $k;
+ }
+ }
+ if ($k =~ m/^hostpci/) {
+ my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
+ if ($entry->{mapping}) {
+ $add_missing_mapping->('pci', $k, $entry->{mapping});
+ push @$mapped_res, $k;
+ }
+ }
# sockets are safe: they will recreated be on the target side post-migrate
next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
die "VM uses local resources\n" if scalar @loc_res && !$noerr;
- return \@loc_res;
+ return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
}
# check if used storages are available on all nodes (use by migrate)
return if !$sid;
# check if storage is available on both nodes
- my $scfg = PVE::Storage::storage_check_node($storecfg, $sid);
- PVE::Storage::storage_check_node($storecfg, $sid, $node);
+ my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
+ PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
+
+ my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
+
+ die "$volid: content type '$vtype' is not available on storage '$sid'\n"
+ if !$scfg->{content}->{$vtype};
});
}
sub check_running {
my ($vmid, $nocheck, $node) = @_;
+ # $nocheck is set when called during a migration, in which case the config
+ # file might still or already reside on the *other* node
+ # - because rename has already happened, and current node is source
+ # - because rename hasn't happened yet, and current node is target
+ # - because rename has happened, current node is target, but hasn't yet
+ # processed it yet
PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
return PVE::QemuServer::Helpers::vm_running_locally($vmid);
}
our $vmstatus_return_properties = {
vmid => get_standard_option('pve-vmid'),
status => {
- description => "Qemu process status.",
+ description => "QEMU process status.",
type => 'string',
enum => ['stopped', 'running'],
},
optional => 1,
},
qmpstatus => {
- description => "Qemu QMP agent status.",
+ description => "VM run state from the 'query-status' QMP monitor command.",
type => 'string',
optional => 1,
},
type => 'string',
optional => 1,
},
+ 'running-machine' => {
+ description => "The currently running machine type (if running).",
+ type => 'string',
+ optional => 1,
+ },
+ 'running-qemu' => {
+ description => "The currently running QEMU version (if running).",
+ type => 'string',
+ optional => 1,
+ },
};
my $last_proc_pid_stat;
my $conf = PVE::QemuConfig->load_config($vmid);
- my $d = { vmid => $vmid };
- $d->{pid} = $list->{$vmid}->{pid};
+ my $d = { vmid => int($vmid) };
+ $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
# fixme: better status?
$d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
$d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
$d->{name} = $conf->{name} || "VM $vmid";
- $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
- : $defaults->{memory}*(1024*1024);
+ $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024);
if ($conf->{balloon}) {
$d->{balloon_min} = $conf->{balloon}*(1024*1024);
$d->{diskread} = 0;
$d->{diskwrite} = 0;
- $d->{template} = PVE::QemuConfig->is_template($conf);
+ $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
$d->{serial} = 1 if conf_has_serial($conf);
$d->{lock} = $conf->{lock} if $conf->{lock};
$d->{netin} += $netdev->{$dev}->{transmit};
if ($full) {
- $d->{nics}->{$dev}->{netout} = $netdev->{$dev}->{receive};
- $d->{nics}->{$dev}->{netin} = $netdev->{$dev}->{transmit};
+ $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
+ $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
}
}
$res->{$vmid}->{diskwrite} = $totalwrbytes;
};
+ my $machinecb = sub {
+ my ($vmid, $resp) = @_;
+ my $data = $resp->{'return'} || [];
+
+ $res->{$vmid}->{'running-machine'} =
+ PVE::QemuServer::Machine::current_from_query_machines($data);
+ };
+
+ my $versioncb = sub {
+ my ($vmid, $resp) = @_;
+ my $data = $resp->{'return'} // {};
+ my $version = 'unknown';
+
+ if (my $v = $data->{qemu}) {
+ $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
+ }
+
+ $res->{$vmid}->{'running-qemu'} = $version;
+ };
+
my $statuscb = sub {
my ($vmid, $resp) = @_;
$qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
+ $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
+ $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
# this fails if ballon driver is not loaded, so this must be
# the last commnand (following command are aborted if this fails).
$qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
$qmpclient->queue_execute(undef, 2);
+ foreach my $vmid (keys %$list) {
+ next if $opt_vmid && ($vmid ne $opt_vmid);
+ next if !$res->{$vmid}->{pid}; #not running
+
+ # we can't use the $qmpclient since it might have already aborted on
+ # 'query-balloon', but this might also fail for older versions...
+ my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
+ $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
+ }
+
foreach my $vmid (keys %$list) {
next if $opt_vmid && ($vmid ne $opt_vmid);
$res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
$id //= 0;
my $audio = $conf->{"audio$id"};
- return undef if !defined($audio);
+ return if !defined($audio);
- my $audioproperties = PVE::JSONSchema::parse_property_string($audio_fmt, $audio);
+ my $audioproperties = parse_property_string($audio_fmt, $audio);
my $audiodriver = $audioproperties->{driver} // 'spice';
return {
return $devs;
}
+sub get_tpm_paths {
+ my ($vmid) = @_;
+ return {
+ socket => "/var/run/qemu-server/$vmid.swtpm",
+ pid => "/var/run/qemu-server/$vmid.swtpm.pid",
+ };
+}
+
+sub add_tpm_device {
+ my ($vmid, $devices, $conf) = @_;
+
+ return if !$conf->{tpmstate0};
+
+ my $paths = get_tpm_paths($vmid);
+
+ push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
+ push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
+ push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
+}
+
+sub start_swtpm {
+ my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
+
+ return if !$tpmdrive;
+
+ my $state;
+ my $tpm = parse_drive("tpmstate0", $tpmdrive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+ if ($storeid) {
+ $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
+ } else {
+ $state = $tpm->{file};
+ }
+
+ my $paths = get_tpm_paths($vmid);
+
+ # during migration, we will get state from remote
+ #
+ if (!$migration) {
+ # run swtpm_setup to create a new TPM state if it doesn't exist yet
+ my $setup_cmd = [
+ "swtpm_setup",
+ "--tpmstate",
+ "file://$state",
+ "--createek",
+ "--create-ek-cert",
+ "--create-platform-cert",
+ "--lock-nvram",
+ "--config",
+ "/etc/swtpm_setup.conf", # do not use XDG configs
+ "--runas",
+ "0", # force creation as root, error if not possible
+ "--not-overwrite", # ignore existing state, do not modify
+ ];
+
+ push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+ # TPM 2.0 supports ECC crypto, use if possible
+ push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
+
+ run_command($setup_cmd, outfunc => sub {
+ print "swtpm_setup: $1\n";
+ });
+ }
+
+ # Used to distinguish different invocations in the log.
+ my $log_prefix = "[id=" . int(time()) . "] ";
+
+ my $emulator_cmd = [
+ "swtpm",
+ "socket",
+ "--tpmstate",
+ "backend-uri=file://$state,mode=0600",
+ "--ctrl",
+ "type=unixio,path=$paths->{socket},mode=0600",
+ "--pid",
+ "file=$paths->{pid}",
+ "--terminate", # terminate on QEMU disconnect
+ "--daemon",
+ "--log",
+ "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
+ ];
+ push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+ run_command($emulator_cmd, outfunc => sub { print $1; });
+
+ my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
+ while (! -e $paths->{pid}) {
+ die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
+ usleep(50_000);
+ }
+
+ # return untainted PID of swtpm daemon so it can be killed on error
+ file_read_firstline($paths->{pid}) =~ m/(\d+)/;
+ return $1;
+}
+
sub vga_conf_has_spice {
my ($vga) = @_;
return $1 || 1;
}
-sub is_native($) {
- my ($arch) = @_;
- return get_host_arch() eq $arch;
-}
-
sub get_vm_arch {
my ($conf) = @_;
return $conf->{arch} // get_host_arch();
aarch64 => 'virt',
};
+sub get_installed_machine_version {
+ my ($kvmversion) = @_;
+ $kvmversion = kvm_user_version() if !defined($kvmversion);
+ $kvmversion =~ m/^(\d+\.\d+)/;
+ return $1;
+}
+
+sub windows_get_pinned_machine_version {
+ my ($machine, $base_version, $kvmversion) = @_;
+
+ my $pin_version = $base_version;
+ if (!defined($base_version) ||
+ !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
+ ) {
+ $pin_version = get_installed_machine_version($kvmversion);
+ }
+ if (!$machine || $machine eq 'pc') {
+ $machine = "pc-i440fx-$pin_version";
+ } elsif ($machine eq 'q35') {
+ $machine = "pc-q35-$pin_version";
+ } elsif ($machine eq 'virt') {
+ $machine = "virt-$pin_version";
+ } else {
+ warn "unknown machine type '$machine', not touching that!\n";
+ }
+
+ return $machine;
+}
+
sub get_vm_machine {
my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
- my $machine = $forcemachine || $conf->{machine};
+ my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
+ my $machine = $forcemachine || $machine_conf->{type};
if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
+ $kvmversion //= kvm_user_version();
+ # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
+ # layout which confuses windows quite a bit and may result in various regressions..
+ # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
+ if (windows_version($conf->{ostype})) {
+ $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
+ }
$arch //= 'x86_64';
$machine ||= $default_machines->{$arch};
if ($add_pve_version) {
- $kvmversion //= kvm_user_version();
my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
$machine .= "+pve$pvever";
}
}
- if ($add_pve_version && $machine !~ m/\+pve\d+$/) {
+ if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
+ my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
+ $machine = $1 if $is_pxe;
+
# for version-pinned machines that do not include a pve-version (e.g.
# pc-q35-4.1), we assume 0 to keep them stable in case we bump
$machine .= '+pve0';
+
+ $machine .= '.pxe' if $is_pxe;
}
return $machine;
}
-sub get_ovmf_files($) {
- my ($arch) = @_;
+sub get_ovmf_files($$$) {
+ my ($arch, $efidisk, $smm) = @_;
- my $ovmf = $OVMF->{$arch}
+ my $types = $OVMF->{$arch}
or die "no OVMF images known for architecture '$arch'\n";
- return @$ovmf;
+ my $type = 'default';
+ if ($arch eq 'x86_64') {
+ if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
+ $type = $smm ? "4m" : "4m-no-smm";
+ $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
+ } else {
+ # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9
+ }
+ }
+
+ my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
+ die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
+ die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
+
+ return ($ovmf_code, $ovmf_vars);
}
my $Arch2Qemu = {
};
sub get_command_for_arch($) {
my ($arch) = @_;
- return '/usr/bin/kvm' if is_native($arch);
+ return '/usr/bin/kvm' if is_native_arch($arch);
my $cmd = $Arch2Qemu->{$arch}
or die "don't know how to emulate architecture '$arch'\n";
$qemu_cmd,
'-machine', $default_machine,
'-display', 'none',
- '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server,nowait",
+ '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
'-mon', 'chardev=qmp,mode=control',
'-pidfile', $pidfile,
'-S', '-daemonize'
};
my $err = $@;
- # force stop with 10 sec timeout and 'nocheck'
- # always stop, even if QMP failed
+ # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
die $err if $err;
return \@flags;
}
+# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
+# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
+my sub should_disable_smm {
+ my ($conf, $vga, $machine) = @_;
+
+ return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
+
+ return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
+ $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
+}
+
+my sub print_ovmf_drive_commandlines {
+ my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
+
+ my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
+
+ my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
+
+ my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
+ if ($d) {
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
+ my ($path, $format) = $d->@{'file', 'format'};
+ if ($storeid) {
+ $path = PVE::Storage::path($storecfg, $d->{file});
+ if (!defined($format)) {
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $format = qemu_img_format($scfg, $volname);
+ }
+ } elsif (!defined($format)) {
+ die "efidisk format must be specified\n";
+ }
+ # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
+ if ($path =~ m/^rbd:/) {
+ $var_drive_str .= ',cache=writeback';
+ $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
+ }
+ $var_drive_str .= ",format=$format,file=$path";
+
+ $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
+ $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
+ } else {
+ log_warn("no efidisk configured! Using temporary efivars disk.");
+ my $path = "/tmp/$vmid-ovmf.fd";
+ PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
+ $var_drive_str .= ",format=raw,file=$path";
+ $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
+ }
+
+ return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
+}
+
sub config_to_command {
- my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu) = @_;
+ my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
+ $live_restore_backing) = @_;
- my $cmd = [];
- my $globalFlags = [];
- my $machineFlags = [];
- my $rtcFlags = [];
+ my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
my $devices = [];
- my $pciaddr = '';
my $bridges = {};
my $ostype = $conf->{ostype};
my $winversion = windows_version($ostype);
my $kvm = $conf->{kvm};
my $nodename = nodename();
+ my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
+
my $arch = get_vm_arch($conf);
my $kvm_binary = get_command_for_arch($arch);
my $kvmver = kvm_user_version($kvm_binary);
my $add_pve_version = min_version($kvmver, 4, 1);
my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
- my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type, $kvmver);
- $kvm //= 1 if is_native($arch);
+ my $machine_version = extract_version($machine_type, $kvmver);
+ $kvm //= 1 if is_native_arch($arch);
$machine_version =~ m/(\d+)\.(\d+)/;
my ($machine_major, $machine_minor) = ($1, $2);
if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
} elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
- die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type', please upgrade node '$nodename'\n"
+ die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
+ ." please upgrade node '$nodename'\n"
} elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
- die "Installed qemu-server (max feature level for $machine_major.$machine_minor is pve$max_pve_version)"
- ." is too old to run machine type '$machine_type', please upgrade node '$nodename'\n";
+ die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
+ ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
+ ." node '$nodename'\n";
}
# if a specific +pve version is required for a feature, use $version_guard
return 1;
};
- if ($kvm) {
- die "KVM virtualisation configured, but not available. Either disable in VM configuration or enable in BIOS.\n"
- if !defined kvm_version();
+ if ($kvm && !defined kvm_version()) {
+ die "KVM virtualisation configured, but not available. Either disable in VM configuration"
+ ." or enable in BIOS.\n";
}
my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
- my $cpuunits = defined($conf->{cpuunits}) ?
- $conf->{cpuunits} : $defaults->{cpuunits};
+ my $cmd = [];
+ if ($conf->{affinity}) {
+ push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
+ }
push @$cmd, $kvm_binary;
my $vmname = $conf->{name} || "vm$vmid";
- push @$cmd, '-name', $vmname;
+ push @$cmd, '-name', "$vmname,debug-threads=on";
+
+ push @$cmd, '-no-shutdown';
my $use_virtio = 0;
my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
- push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server,nowait";
+ push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
push @$cmd, '-mon', "chardev=qmp,mode=control";
if (min_version($machine_version, 2, 12)) {
}
if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
- my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch);
- die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
-
- my ($path, $format);
- if (my $efidisk = $conf->{efidisk0}) {
- my $d = parse_drive('efidisk0', $efidisk);
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
- $format = $d->{format};
- if ($storeid) {
- $path = PVE::Storage::path($storecfg, $d->{file});
- if (!defined($format)) {
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
- }
- } else {
- $path = $d->{file};
- die "efidisk format must be specified\n"
- if !defined($format);
- }
- } else {
- warn "no efidisk configured! Using temporary efivars disk.\n";
- $path = "/tmp/$vmid-ovmf.fd";
- PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
- $format = 'raw';
- }
-
- my $size_str = "";
+ die "OVMF (UEFI) BIOS is not supported on 32-bit CPU types\n"
+ if !$forcecpu && get_cpu_bitness($conf->{cpu}, $arch) == 32;
- if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
- $size_str = ",size=" . (-s $ovmf_vars);
- }
-
- push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly,file=$ovmf_code";
- push @$cmd, '-drive', "if=pflash,unit=1,format=$format,id=drive-efidisk0$size_str,file=$path";
+ my ($code_drive_str, $var_drive_str) =
+ print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
+ push $cmd->@*, '-drive', $code_drive_str;
+ push $cmd->@*, '-drive', $var_drive_str;
}
- # load q35 config
- if ($q35) {
+ if ($q35) { # tell QEMU to load q35 config early
# we use different pcie-port hardware for qemu >= 4.0 for passthrough
if (min_version($machine_version, 4, 0)) {
push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
}
}
+ if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
+ push @$cmd, $fixups->@*;
+ }
+
if ($conf->{vmgenid}) {
push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
}
# add usb controllers
- my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers($conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
+ my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
+ $conf, $bridges, $arch, $machine_type, $machine_version);
push @$devices, @usbcontrollers if @usbcontrollers;
my $vga = parse_vga($conf->{vga});
}
# enable absolute mouse coordinates (needed by vnc)
- my $tablet;
- if (defined($conf->{tablet})) {
- $tablet = $conf->{tablet};
- } else {
+ my $tablet = $conf->{tablet};
+ if (!defined($tablet)) {
$tablet = $defaults->{tablet};
$tablet = 0 if $qxlnum; # disable for spice because it is not needed
$tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
push @$devices, '-device', $kbd if defined($kbd);
}
+ my $bootorder = device_bootorder($conf);
+
# host pci device passthrough
- my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
- $conf, $devices, $winversion, $q35, $bridges, $arch, $machine_type);
+ my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
+ $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
# usb devices
my $usb_dev_features = {};
$usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
- my @usbdevices = PVE::QemuServer::USB::get_usb_devices($conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features);
+ my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
+ $conf, $usb_dev_features, $bootorder, $machine_version);
push @$devices, @usbdevices if @usbdevices;
+
# serial devices
for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
- if (my $path = $conf->{"serial$i"}) {
- if ($path eq 'socket') {
- my $socket = "/var/run/qemu-server/${vmid}.serial$i";
- push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server,nowait";
- # On aarch64, serial0 is the UART device. Qemu only allows
- # connecting UART devices via the '-serial' command line, as
- # the device has a fixed slot on the hardware...
- if ($arch eq 'aarch64' && $i == 0) {
- push @$devices, '-serial', "chardev:serial$i";
- } else {
- push @$devices, '-device', "isa-serial,chardev=serial$i";
- }
+ my $path = $conf->{"serial$i"} or next;
+ if ($path eq 'socket') {
+ my $socket = "/var/run/qemu-server/${vmid}.serial$i";
+ push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
+ # On aarch64, serial0 is the UART device. QEMU only allows
+ # connecting UART devices via the '-serial' command line, as
+ # the device has a fixed slot on the hardware...
+ if ($arch eq 'aarch64' && $i == 0) {
+ push @$devices, '-serial', "chardev:serial$i";
} else {
- die "no such serial device\n" if ! -c $path;
- push @$devices, '-chardev', "tty,id=serial$i,path=$path";
push @$devices, '-device', "isa-serial,chardev=serial$i";
}
+ } else {
+ die "no such serial device\n" if ! -c $path;
+ push @$devices, '-chardev', "serial,id=serial$i,path=$path";
+ push @$devices, '-device', "isa-serial,chardev=serial$i";
}
}
for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
if (my $path = $conf->{"parallel$i"}) {
die "no such parallel device\n" if ! -c $path;
- my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
+ my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
push @$devices, '-device', "isa-parallel,chardev=parallel$i";
}
push @$devices, @$audio_devs;
}
+ # Add a TPM only if the VM is not a template,
+ # to support backing up template VMs even if the TPM disk is write-protected.
+ add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
+
my $sockets = 1;
$sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
$sockets = $conf->{sockets} if $conf->{sockets};
my $allowed_vcpus = $cpuinfo->{cpus};
- die "MAX $allowed_vcpus vcpus allowed per VM on this node\n"
- if ($allowed_vcpus < $maxcpus);
-
- if($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
+ die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
+ if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
for (my $i = 2; $i <= $vcpus; $i++) {
- my $cpustr = print_cpu_device($conf,$i);
+ my $cpustr = print_cpu_device($conf, $arch, $i);
push @$cmd, '-device', $cpustr;
}
}
push @$cmd, '-nodefaults';
- my $bootorder = $conf->{boot} || $confdesc->{boot}->{default};
-
- my $bootindex_hash = {};
- my $i = 1;
- foreach my $o (split(//, $bootorder)) {
- $bootindex_hash->{$o} = $i*100;
- $i++;
- }
-
push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
- push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
+ push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
- push @$devices, '-device', print_vga_device($conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
+ push @$devices, '-device', print_vga_device(
+ $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
+
+ push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
+
my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
- push @$cmd, '-vnc', "unix:$socket,password";
+ push @$cmd, '-vnc', "unix:$socket,password=on";
} else {
push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
push @$cmd, '-nographic';
if ($winversion >= 6) {
push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
- push @$cmd, '-no-hpet';
+ push @$machineFlags, 'hpet=off';
}
push @$rtcFlags, 'driftfix=slew' if $tdf;
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config(
+ $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
push @$cmd, '-S' if $conf->{freeze};
if ($guest_agent->{enabled}) {
my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
- push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0";
+ push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
}
}
- my $rng = parse_rng($conf->{rng0}) if $conf->{rng0};
- if ($rng && &$version_guard(4, 1, 2)) {
+ my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
+ if ($rng && $version_guard->(4, 1, 2)) {
check_rng_source($rng->{source});
my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
my $spice_port;
- if ($qxlnum) {
+ assert_clipboard_config($vga);
+ my $is_spice = $qxlnum || $vga->{type} =~ /^virtio/;
+
+ if ($is_spice || ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc')) {
if ($qxlnum > 1) {
if ($winversion){
for (my $i = 1; $i < $qxlnum; $i++){
- push @$devices, '-device', print_vga_device($conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
+ push @$devices, '-device', print_vga_device(
+ $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
}
} else {
# assume other OS works like Linux
my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
- my $pfamily = PVE::Tools::get_host_address_family($nodename);
- my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
- die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
-
push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
- push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
+ if ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc') {
+ push @$devices, '-chardev', 'qemu-vdagent,id=vdagent,name=vdagent,clipboard=on';
+ } else {
+ push @$devices, '-chardev', 'spicevmc,id=vdagent,name=vdagent';
+ }
push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
- my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
- $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
+ if ($is_spice) {
+ my $pfamily = PVE::Tools::get_host_address_family($nodename);
+ my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
+ die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
- my $spice_enhancement = PVE::JSONSchema::parse_property_string($spice_enhancements_fmt, $conf->{spice_enhancements} // '');
- if ($spice_enhancement->{foldersharing}) {
- push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
- push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
- }
+ my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
+ $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
- my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
- $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}" if $spice_enhancement->{videostreaming};
- push @$devices, '-spice', "$spice_opts";
+ my $spice_enhancement_str = $conf->{spice_enhancements} // '';
+ my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
+ if ($spice_enhancement->{foldersharing}) {
+ push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
+ push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
+ }
+
+ my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
+ $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
+ if $spice_enhancement->{videostreaming};
+ push @$devices, '-spice', "$spice_opts";
+ }
}
# enable balloon by default, unless explicitly disabled
if (!defined($conf->{balloon}) || $conf->{balloon}) {
- $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
- push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
+ my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
+ my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
+ $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
+ push @$devices, '-device', $ballooncmd;
}
if ($conf->{watchdog}) {
my $wdopts = parse_watchdog($conf->{watchdog});
- $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
+ my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
my $watchdog = $wdopts->{model} || 'i6300esb';
push @$devices, '-device', "$watchdog$pciaddr";
push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
my ($ds, $drive) = @_;
if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
+ check_volume_storage_type($storecfg, $drive->{file});
push @$vollist, $drive->{file};
}
# ignore efidisk here, already added in bios/fw handling code above
return if $drive->{interface} eq 'efidisk';
+ # similar for TPM
+ return if $drive->{interface} eq 'tpmstate';
$use_virtio = 1 if $ds =~ m/^virtio/;
- if (drive_is_cdrom ($drive)) {
- if ($bootindex_hash->{d}) {
- $drive->{bootindex} = $bootindex_hash->{d};
- $bootindex_hash->{d} += 1;
- }
- } else {
- if ($bootindex_hash->{c}) {
- $drive->{bootindex} = $bootindex_hash->{c} if $conf->{bootdisk} && ($conf->{bootdisk} eq $ds);
- $bootindex_hash->{c} += 1;
- }
- }
+ $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
if ($drive->{interface} eq 'virtio'){
push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
- $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
+ my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
my $iothread = '';
$iothread .= ",iothread=iothread-$controller_prefix$controller";
push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
} elsif ($drive->{iothread}) {
- warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
+ log_warn(
+ "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
+ );
}
my $queues = '';
$queues = ",num_queues=$drive->{queues}";
}
- push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues" if !$scsicontroller->{$controller};
+ push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
+ if !$scsicontroller->{$controller};
$scsicontroller->{$controller}=1;
}
if ($drive->{interface} eq 'sata') {
my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
- $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
- push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr" if !$ahcicontroller->{$controller};
+ my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
+ push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
+ if !$ahcicontroller->{$controller};
$ahcicontroller->{$controller}=1;
}
- my $drive_cmd = print_drive_commandline_full($storecfg, $vmid, $drive);
+ my $live_restore = $live_restore_backing->{$ds};
+ my $live_blockdev_name = undef;
+ if ($live_restore) {
+ $live_blockdev_name = $live_restore->{name};
+ push @$devices, '-blockdev', $live_restore->{blockdev};
+ }
+
+ my $drive_cmd = print_drive_commandline_full(
+ $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0));
+
+ # extra protection for templates, but SATA and IDE don't support it..
+ $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
+
push @$devices, '-drive',$drive_cmd;
- push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
+ push @$devices, '-device', print_drivedevice_full(
+ $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
});
for (my $i = 0; $i < $MAX_NETS; $i++) {
- next if !$conf->{"net$i"};
- my $d = parse_net($conf->{"net$i"});
- next if !$d;
+ my $netname = "net$i";
+
+ next if !$conf->{$netname};
+ my $d = parse_net($conf->{$netname});
+ next if !$d;
+ # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
- $use_virtio = 1 if $d->{model} eq 'virtio';
+ $use_virtio = 1 if $d->{model} eq 'virtio';
- if ($bootindex_hash->{n}) {
- $d->{bootindex} = $bootindex_hash->{n};
- $bootindex_hash->{n} += 1;
- }
+ $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
- my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, "net$i");
- push @$devices, '-netdev', $netdevfull;
+ my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
+ push @$devices, '-netdev', $netdevfull;
- my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files, $arch, $machine_type);
- push @$devices, '-device', $netdevicefull;
+ my $netdevicefull = print_netdevice_full(
+ $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
+
+ push @$devices, '-device', $netdevicefull;
}
if ($conf->{ivshmem}) {
- my $ivshmem = PVE::JSONSchema::parse_property_string($ivshmem_fmt, $conf->{ivshmem});
+ my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
my $bus;
if ($q35) {
my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
- push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path,size=$ivshmem->{size}M";
+ push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
+ .",size=$ivshmem->{size}M";
}
# pci.4 is nested in pci.1
$bridges->{1} = 1 if $bridges->{4};
- if (!$q35) {
- # add pci bridges
- if (min_version($machine_version, 2, 3)) {
+ if (!$q35) { # add pci bridges
+ if (min_version($machine_version, 2, 3)) {
$bridges->{1} = 1;
$bridges->{2} = 1;
}
-
$bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
-
}
for my $k (sort {$b cmp $a} keys %$bridges) {
if ($k == 2 && $legacy_igd) {
$k_name = "$k-igd";
}
- $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
-
+ my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
- if ($q35) {
- # add after -readconfig pve-q35.cfg
+
+ if ($q35) { # add after -readconfig pve-q35.cfg
splice @$devices, 2, 0, '-device', $devstr;
} else {
unshift @$devices, '-device', $devstr if $k > 0;
push @$machineFlags, 'accel=tcg';
}
+ push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
+
my $machine_type_min = $machine_type;
if ($add_pve_version) {
$machine_type_min =~ s/\+pve\d+$//;
}
push @$machineFlags, "type=${machine_type_min}";
+ PVE::QemuServer::Machine::assert_valid_machine_property($conf, $machine_conf);
+
+ if (my $viommu = $machine_conf->{viommu}) {
+ if ($viommu eq 'intel') {
+ unshift @$devices, '-device', 'intel-iommu,intremap=on,caching-mode=on';
+ push @$machineFlags, 'kernel-irqchip=split';
+ } elsif ($viommu eq 'virtio') {
+ push @$devices, '-device', 'virtio-iommu-pci';
+ }
+ }
+
push @$cmd, @$devices;
push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
print "activating and using '$vmstate' as vmstate\n";
}
+ if (PVE::QemuConfig->is_template($conf)) {
+ # needed to workaround base volumes being read-only
+ push @$cmd, '-snapshot';
+ }
+
# add custom args
if ($conf->{args}) {
my $aa = PVE::Tools::split_args($conf->{args});
push @$cmd, @$aa;
}
- return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
+ return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
}
sub check_rng_source {
my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
- # Needs to abort, otherwise QEMU crashes on first rng access.
- # Note that rng_current cannot be changed to 'none' manually, so
- # once the VM is past this point, it is no longer an issue.
- die "Cannot start VM with passed-through RNG device: '/dev/hwrng'"
- . " exists, but '$rng_current' is set to 'none'. Ensure that"
- . " a compatible hardware-RNG is attached to the host.\n";
+ # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
+ # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
+ die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
+ ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
+ ." to the host.\n";
}
}
my $to_check = [];
for my $d (@$devices_to_check) {
$devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
- next if !$d->{'pci_bridge'};
+ next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
$devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
push @$to_check, @{$d->{'pci_bridge'}->{devices}};
# qom-list path=/machine/peripheral
my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
foreach my $per (@$resperipheral) {
- if ($per->{name} =~ m/^usb\d+$/) {
+ if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
$devices->{$per->{name}} = 1;
}
}
my $devices_list = vm_devices_list($vmid);
return 1 if defined($devices_list->{$deviceid});
- qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type); # add PCI bridge if we need it for the device
+ # add PCI bridge if we need it for the device
+ qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
if ($deviceid eq 'tablet') {
-
qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
-
} elsif ($deviceid eq 'keyboard') {
-
qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
-
+ } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
+ my $id = $1;
+ qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
+ qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
} elsif ($deviceid =~ m/^usb(\d+)$/) {
-
- die "usb hotplug currently not reliable\n";
- # since we can't reliably hot unplug all added usb devices
- # and usb passthrough disables live migration
- # we disable usb hotplugging for now
- qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
-
+ qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
} elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
-
qemu_iothread_add($vmid, $deviceid, $device);
- qemu_driveadd($storecfg, $vmid, $device);
- my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, $arch, $machine_type);
+ qemu_driveadd($storecfg, $vmid, $device);
+ my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
- qemu_deviceadd($vmid, $devicefull);
+ qemu_deviceadd($vmid, $devicefull);
eval { qemu_deviceaddverify($vmid, $deviceid); };
if (my $err = $@) {
eval { qemu_drivedel($vmid, $deviceid); };
warn $@ if $@;
die $err;
}
-
} elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
-
-
- my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
- my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
+ my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
+ my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
- my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
+ my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
qemu_iothread_add($vmid, $deviceid, $device);
$devicefull .= ",num_queues=$device->{queues}";
}
- qemu_deviceadd($vmid, $devicefull);
- qemu_deviceaddverify($vmid, $deviceid);
-
+ qemu_deviceadd($vmid, $devicefull);
+ qemu_deviceaddverify($vmid, $deviceid);
} elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
-
qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
qemu_driveadd($storecfg, $vmid, $device);
- my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, $arch, $machine_type);
+ my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
eval { qemu_deviceadd($vmid, $devicefull); };
if (my $err = $@) {
eval { qemu_drivedel($vmid, $deviceid); };
warn $@ if $@;
die $err;
}
-
} elsif ($deviceid =~ m/^(net)(\d+)$/) {
-
- return undef if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
+ return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
+ my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
- my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
+ my $netdevicefull = print_netdevice_full(
+ $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
qemu_deviceadd($vmid, $netdevicefull);
eval {
qemu_deviceaddverify($vmid, $deviceid);
warn $@ if $@;
die $err;
}
-
} elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
-
my $bridgeid = $2;
my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
qemu_deviceadd($vmid, $devicefull);
qemu_deviceaddverify($vmid, $deviceid);
-
} else {
die "can't hotplug device '$deviceid'\n";
}
my $devices_list = vm_devices_list($vmid);
return 1 if !defined($devices_list->{$deviceid});
- die "can't unplug bootdisk" if $conf->{bootdisk} && $conf->{bootdisk} eq $deviceid;
-
- if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
+ my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
+ die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
+ if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
qemu_devicedel($vmid, $deviceid);
-
+ } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
} elsif ($deviceid =~ m/^usb\d+$/) {
-
- die "usb hotplug currently not reliable\n";
- # when unplugging usb devices this way,
- # there may be remaining usb controllers/hubs
- # so we disable it for now
qemu_devicedel($vmid, $deviceid);
qemu_devicedelverify($vmid, $deviceid);
-
} elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
+ my $device = parse_drive($deviceid, $conf->{$deviceid});
- qemu_devicedel($vmid, $deviceid);
- qemu_devicedelverify($vmid, $deviceid);
- qemu_drivedel($vmid, $deviceid);
- qemu_iothread_del($conf, $vmid, $deviceid);
-
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_drivedel($vmid, $deviceid);
+ qemu_iothread_del($vmid, $deviceid, $device);
} elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
-
qemu_devicedel($vmid, $deviceid);
qemu_devicedelverify($vmid, $deviceid);
- qemu_iothread_del($conf, $vmid, $deviceid);
-
} elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
+ my $device = parse_drive($deviceid, $conf->{$deviceid});
- qemu_devicedel($vmid, $deviceid);
- qemu_drivedel($vmid, $deviceid);
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_drivedel($vmid, $deviceid);
qemu_deletescsihw($conf, $vmid, $deviceid);
+ qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
+ if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
} elsif ($deviceid =~ m/^(net)(\d+)$/) {
-
- qemu_devicedel($vmid, $deviceid);
- qemu_devicedelverify($vmid, $deviceid);
- qemu_netdevdel($vmid, $deviceid);
-
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_netdevdel($vmid, $deviceid);
} else {
die "can't unplug device '$deviceid'\n";
}
return 1;
}
-sub qemu_deviceadd {
- my ($vmid, $devicefull) = @_;
-
- $devicefull = "driver=".$devicefull;
- my %options = split(/[=,]/, $devicefull);
-
- mon_cmd($vmid, "device_add" , %options);
-}
-
-sub qemu_devicedel {
- my ($vmid, $deviceid) = @_;
+sub qemu_spice_usbredir_chardev_add {
+ my ($vmid, $id) = @_;
- my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
+ mon_cmd($vmid, "chardev-add" , (
+ id => $id,
+ backend => {
+ type => 'spicevmc',
+ data => {
+ type => "usbredir",
+ },
+ },
+ ));
}
sub qemu_iothread_add {
- my($vmid, $deviceid, $device) = @_;
+ my ($vmid, $deviceid, $device) = @_;
if ($device->{iothread}) {
my $iothreads = vm_iothreads_list($vmid);
}
sub qemu_iothread_del {
- my($conf, $vmid, $deviceid) = @_;
+ my ($vmid, $deviceid, $device) = @_;
- my $confid = $deviceid;
- if ($deviceid =~ m/^(?:virtioscsi|scsihw)(\d+)$/) {
- $confid = 'scsi' . $1;
- }
- my $device = parse_drive($confid, $conf->{$confid});
if ($device->{iothread}) {
my $iothreads = vm_iothreads_list($vmid);
qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
}
}
-sub qemu_objectadd {
- my($vmid, $objectid, $qomtype) = @_;
-
- mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
-
- return 1;
-}
-
-sub qemu_objectdel {
- my($vmid, $objectid) = @_;
-
- mon_cmd($vmid, "object-del", id => $objectid);
-
- return 1;
-}
-
sub qemu_driveadd {
my ($storecfg, $vmid, $device) = @_;
- my $drive = print_drive_commandline_full($storecfg, $vmid, $device);
+ my $kvmver = get_running_qemu_version($vmid);
+ my $io_uring = min_version($kvmver, 6, 0);
+ my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
$drive =~ s/\\/\\\\/g;
my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
}
sub qemu_drivedel {
- my($vmid, $deviceid) = @_;
+ my ($vmid, $deviceid) = @_;
my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
$ret =~ s/^\s+//;
my $scsihwid="$controller_prefix$controller";
my $devices_list = vm_devices_list($vmid);
- if(!defined($devices_list->{$scsihwid})) {
+ if (!defined($devices_list->{$scsihwid})) {
vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
}
foreach my $opt (keys %{$devices_list}) {
if (is_valid_drivename($opt)) {
my $drive = parse_drive($opt, $conf->{$opt});
- if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
+ if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
return 1;
}
}
vm_deviceunplug($vmid, $conf, $deviceid);
# check if xhci controller is necessary and available
- if ($device->{usb3}) {
-
- my $devicelist = vm_devices_list($vmid);
+ my $devicelist = vm_devices_list($vmid);
- if (!$devicelist->{xhci}) {
- my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
- qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
- }
+ if (!$devicelist->{xhci}) {
+ my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
+ qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
}
- my $d = parse_usb_device($device->{host});
- $d->{usb3} = $device->{usb3};
# add the new one
- vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
+ vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
}
sub qemu_cpu_hotplug {
if scalar(@{$currentrunningvcpus}) != $currentvcpus;
if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
+ my $arch = get_vm_arch($conf);
for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
- my $cpustr = print_cpu_device($conf, $i);
+ my $cpustr = print_cpu_device($conf, $arch, $i);
qemu_deviceadd($vmid, $cpustr);
my $retry = 0;
}
-# old code, only used to shutdown old VM after update
-sub __read_avail {
- my ($fh, $timeout) = @_;
-
- my $sel = new IO::Select;
- $sel->add($fh);
-
- my $res = '';
- my $buf;
-
- my @ready;
- while (scalar (@ready = $sel->can_read($timeout))) {
- my $count;
- if ($count = $fh->sysread($buf, 8192)) {
- if ($buf =~ /^(.*)\(qemu\) $/s) {
- $res .= $1;
- last;
- } else {
- $res .= $buf;
- }
- } else {
- if (!defined($count)) {
- die "$!\n";
- }
- last;
- }
- }
-
- die "monitor read timeout\n" if !scalar(@ready);
-
- return $res;
-}
-
sub qemu_block_resize {
my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
my $running = check_running($vmid);
- $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
+ PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
return if !$running;
my $padding = (1024 - $size % 1024) % 1024;
$size = $size + $padding;
- mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size));
-
+ mon_cmd(
+ $vmid,
+ "block_resize",
+ device => $deviceid,
+ size => int($size),
+ timeout => 60,
+ );
}
sub qemu_volume_snapshot {
my $running = check_running($vmid);
- if ($running && do_snapshots_with_qemu($storecfg, $volid)){
+ if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
} else {
PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
}
sub qemu_volume_snapshot_delete {
- my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
+ my ($vmid, $storecfg, $volid, $snap) = @_;
my $running = check_running($vmid);
+ my $attached_deviceid;
- if($running) {
-
- $running = undef;
+ if ($running) {
my $conf = PVE::QemuConfig->load_config($vmid);
PVE::QemuConfig->foreach_volume($conf, sub {
my ($ds, $drive) = @_;
- $running = 1 if $drive->{file} eq $volid;
+ $attached_deviceid = "drive-$ds" if $drive->{file} eq $volid;
});
}
- if ($running && do_snapshots_with_qemu($storecfg, $volid)){
- mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
+ if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
+ mon_cmd(
+ $vmid,
+ 'blockdev-snapshot-delete-internal-sync',
+ device => $attached_deviceid,
+ name => $snap,
+ );
} else {
- PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
+ PVE::Storage::volume_snapshot_delete(
+ $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
}
}
sub set_migration_caps {
- my ($vmid) = @_;
+ my ($vmid, $savevm) = @_;
+
+ my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
+
+ my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
+ my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
my $cap_ref = [];
"xbzrle" => 1,
"x-rdma-pin-all" => 0,
"zero-blocks" => 0,
- "compress" => 0
+ "compress" => 0,
+ "dirty-bitmaps" => $dirty_bitmaps,
};
my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
my $volhash = {};
my $test_volid = sub {
- my ($key, $drive, $snapname) = @_;
+ my ($key, $drive, $snapname, $pending) = @_;
my $volid = $drive->{file};
return if !$volid;
$volhash->{$volid}->{shared} //= 0;
$volhash->{$volid}->{shared} = 1 if $drive->{shared};
- $volhash->{$volid}->{referenced_in_config} //= 0;
- $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
+ $volhash->{$volid}->{is_unused} //= 0;
+ $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
+
+ $volhash->{$volid}->{is_attached} //= 0;
+ $volhash->{$volid}->{is_attached} = 1
+ if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
$volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
if defined($snapname);
+ $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
+
my $size = $drive->{size};
$volhash->{$volid}->{size} //= $size if $size;
$volhash->{$volid}->{is_vmstate} //= 0;
$volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
- $volhash->{$volid}->{is_unused} //= 0;
- $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
+ $volhash->{$volid}->{is_tpmstate} //= 0;
+ $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
+
+ $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
};
my $include_opts = {
};
PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
+
+ PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
+ if defined($conf->{pending}) && $conf->{pending}->%*;
+
foreach my $snapname (keys %{$conf->{snapshots}}) {
my $snap = $conf->{snapshots}->{$snapname};
PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
}
my $fast_plug_option = {
+ 'description' => 1,
+ 'hookscript' => 1,
'lock' => 1,
+ 'migrate_downtime' => 1,
+ 'migrate_speed' => 1,
'name' => 1,
'onboot' => 1,
+ 'protection' => 1,
'shares' => 1,
'startup' => 1,
- 'description' => 1,
- 'protection' => 1,
- 'vmstatestorage' => 1,
- 'hookscript' => 1,
'tags' => 1,
+ 'vmstatestorage' => 1,
+};
+
+for my $opt (keys %$confdesc_cloudinit) {
+ $fast_plug_option->{$opt} = 1;
};
# hotplug changes in [PENDING]
$errors->{$opt} = "hotplug problem - $msg";
};
+ my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
+
+ my $cloudinit_record_changed = sub {
+ my ($conf, $opt, $old, $new) = @_;
+ return if !$cloudinit_pending_properties->{$opt};
+
+ my $ci = ($conf->{cloudinit} //= {});
+
+ my $recorded = $ci->{$opt};
+ my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
+
+ if (defined($new)) {
+ if (defined($old)) {
+ # an existing value is being modified
+ if (defined($recorded)) {
+ # the value was already not in sync
+ if ($new eq $recorded) {
+ # a value is being reverted to the cloud-init state:
+ delete $ci->{$opt};
+ delete $added{$opt};
+ } else {
+ # the value was changed multiple times, do nothing
+ }
+ } elsif ($added{$opt}) {
+ # the value had been marked as added and is being changed, do nothing
+ } else {
+ # the value is new, record it:
+ $ci->{$opt} = $old;
+ }
+ } else {
+ # a new value is being added
+ if (defined($recorded)) {
+ # it was already not in sync
+ if ($new eq $recorded) {
+ # a value is being reverted to the cloud-init state:
+ delete $ci->{$opt};
+ delete $added{$opt};
+ } else {
+ # the value had temporarily been removed, do nothing
+ }
+ } elsif ($added{$opt}) {
+ # the value had been marked as added already, do nothing
+ } else {
+ # the value is new, add it
+ $added{$opt} = 1;
+ }
+ }
+ } elsif (!defined($old)) {
+ # a non-existent value is being removed? ignore...
+ } else {
+ # a value is being deleted
+ if (defined($recorded)) {
+ # a value was already recorded, just keep it
+ } elsif ($added{$opt}) {
+ # the value was marked as added, remove it
+ delete $added{$opt};
+ } else {
+ # a previously unrecorded value is being removed, record the old value:
+ $ci->{$opt} = $old;
+ }
+ }
+
+ my $added = join(',', sort keys %added);
+ $ci->{added} = $added if length($added);
+ };
+
my $changes = 0;
foreach my $opt (keys %{$conf->{pending}}) { # add/change
if ($fast_plug_option->{$opt}) {
- $conf->{$opt} = $conf->{pending}->{$opt};
- delete $conf->{pending}->{$opt};
+ my $new = delete $conf->{pending}->{$opt};
+ $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
+ $conf->{$opt} = $new;
$changes = 1;
}
}
PVE::QemuConfig->write_config($vmid, $conf);
}
+ my $ostype = $conf->{ostype};
+ my $version = extract_version($machine_type, get_running_qemu_version($vmid));
my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+ my $usb_hotplug = $hotplug_features->{usb}
+ && min_version($version, 7, 1)
+ && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
+ my $cgroup = PVE::QemuServer::CGroup->new($vmid);
my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
+
foreach my $opt (sort keys %$pending_delete_hash) {
next if $selection && !$selection->{$opt};
my $force = $pending_delete_hash->{$opt}->{force};
eval {
if ($opt eq 'hotplug') {
- die "skip\n" if ($conf->{hotplug} =~ /memory/);
+ die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/);
} elsif ($opt eq 'tablet') {
die "skip\n" if !$hotplug_features->{usb};
if ($defaults->{tablet}) {
vm_deviceunplug($vmid, $conf, 'tablet');
vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
}
- } elsif ($opt =~ m/^usb\d+/) {
- die "skip\n";
- # since we cannot reliably hot unplug usb devices
- # we are disabling it
- die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
+ } elsif ($opt =~ m/^usb(\d+)$/) {
+ my $index = $1;
+ die "skip\n" if !$usb_hotplug;
+ vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
vm_deviceunplug($vmid, $conf, $opt);
} elsif ($opt eq 'vcpus') {
die "skip\n" if !$hotplug_features->{cpu};
# enable balloon device is not hotpluggable
die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
# here we reset the ballooning value to memory
- my $balloon = $conf->{memory} || $defaults->{memory};
+ my $balloon = get_current_memory($conf->{memory});
mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
} elsif ($fast_plug_option->{$opt}) {
# do nothing
} elsif ($opt =~ m/^net(\d+)$/) {
die "skip\n" if !$hotplug_features->{network};
vm_deviceunplug($vmid, $conf, $opt);
+ if($have_sdn) {
+ my $net = PVE::QemuServer::parse_net($conf->{$opt});
+ PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name});
+ }
} elsif (is_valid_drivename($opt)) {
die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
vm_deviceunplug($vmid, $conf, $opt);
vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
} elsif ($opt =~ m/^memory$/) {
die "skip\n" if !$hotplug_features->{memory};
- PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
+ PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf);
} elsif ($opt eq 'cpuunits') {
- cgroups_write("cpu", $vmid, "cpu.shares", $defaults->{cpuunits});
+ $cgroup->change_cpu_shares(undef);
} elsif ($opt eq 'cpulimit') {
- cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", -1);
+ $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
} else {
die "skip\n";
}
if (my $err = $@) {
&$add_error($opt, $err) if $err ne "skip\n";
} else {
- delete $conf->{$opt};
+ my $old = delete $conf->{$opt};
+ $cloudinit_record_changed->($conf, $opt, $old, undef);
PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
}
}
- my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
- $apply_pending_cloudinit = sub {
- return if $apply_pending_cloudinit_done; # once is enough
- $apply_pending_cloudinit_done = 1; # once is enough
-
- my ($key, $value) = @_;
-
- my @cloudinit_opts = keys %$confdesc_cloudinit;
- foreach my $opt (keys %{$conf->{pending}}) {
- next if !grep { $_ eq $opt } @cloudinit_opts;
- $conf->{$opt} = delete $conf->{pending}->{$opt};
- }
-
- my $new_conf = { %$conf };
- $new_conf->{$key} = $value;
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
- };
-
+ my $cloudinit_opt;
foreach my $opt (keys %{$conf->{pending}}) {
next if $selection && !$selection->{$opt};
my $value = $conf->{pending}->{$opt};
eval {
if ($opt eq 'hotplug') {
die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
+ die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/);
} elsif ($opt eq 'tablet') {
die "skip\n" if !$hotplug_features->{usb};
if ($value == 1) {
vm_deviceunplug($vmid, $conf, 'tablet');
vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
}
- } elsif ($opt =~ m/^usb\d+$/) {
- die "skip\n";
- # since we cannot reliably hot unplug usb devices
- # we are disabling it
- die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
- my $d = eval { PVE::JSONSchema::parse_property_string($usbdesc->{format}, $value) };
- die "skip\n" if !$d;
- qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
+ } elsif ($opt =~ m/^usb(\d+)$/) {
+ my $index = $1;
+ die "skip\n" if !$usb_hotplug;
+ my $d = eval { parse_property_string('pve-qm-usb', $value) };
+ my $id = $opt;
+ if ($d->{host} =~ m/^spice$/i) {
+ $id = "usbredirdev$index";
+ }
+ qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
} elsif ($opt eq 'vcpus') {
die "skip\n" if !$hotplug_features->{cpu};
qemu_cpu_hotplug($vmid, $conf, $value);
# allow manual ballooning if shares is set to zero
if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
- my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
+ my $memory = get_current_memory($conf->{memory});
+ my $balloon = $conf->{pending}->{balloon} || $memory;
mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
}
} elsif ($opt =~ m/^net(\d+)$/) {
vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
$vmid, $opt, $value, $arch, $machine_type);
} elsif (is_valid_drivename($opt)) {
- die "skip\n" if $opt eq 'efidisk0';
+ die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
# some changes can be done without hotplug
my $drive = parse_drive($opt, $value);
if (drive_is_cloudinit($drive)) {
- &$apply_pending_cloudinit($opt, $value);
+ $cloudinit_opt = [$opt, $drive];
+ # apply all the other changes first, then generate the cloudinit disk
+ die "skip\n";
}
vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
$vmid, $opt, $value, $arch, $machine_type);
} elsif ($opt =~ m/^memory$/) { #dimms
die "skip\n" if !$hotplug_features->{memory};
- $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
+ $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value);
} elsif ($opt eq 'cpuunits') {
- cgroups_write("cpu", $vmid, "cpu.shares", $conf->{pending}->{$opt});
+ my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
+ $cgroup->change_cpu_shares($new_cpuunits);
} elsif ($opt eq 'cpulimit') {
my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
- cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", $cpulimit);
+ $cgroup->change_cpu_quota($cpulimit, 100000);
+ } elsif ($opt eq 'agent') {
+ vmconfig_update_agent($conf, $opt, $value);
} else {
die "skip\n"; # skip non-hot-pluggable options
}
if (my $err = $@) {
&$add_error($opt, $err) if $err ne "skip\n";
} else {
+ $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
$conf->{$opt} = $value;
delete $conf->{pending}->{$opt};
}
}
+ if (defined($cloudinit_opt)) {
+ my ($opt, $drive) = @$cloudinit_opt;
+ my $value = $conf->{pending}->{$opt};
+ eval {
+ my $temp = {%$conf, $opt => $value};
+ PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
+ vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
+ $vmid, $opt, $value, $arch, $machine_type);
+ };
+ if (my $err = $@) {
+ &$add_error($opt, $err) if $err ne "skip\n";
+ } else {
+ $conf->{$opt} = $value;
+ delete $conf->{pending}->{$opt};
+ }
+ }
+
+ # unplug xhci controller if no usb device is left
+ if ($usb_hotplug) {
+ my $has_usb = 0;
+ for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
+ next if !defined($conf->{"usb$i"});
+ $has_usb = 1;
+ last;
+ }
+ if (!$has_usb) {
+ vm_deviceunplug($vmid, $conf, 'xhci');
+ }
+ }
+
PVE::QemuConfig->write_config($vmid, $conf);
+
+ if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
+ PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
+ }
}
sub try_deallocate_drive {
}
}
- return undef;
+ return;
}
sub vmconfig_delete_or_detach_drive {
sub vmconfig_apply_pending {
- my ($vmid, $conf, $storecfg, $errors) = @_;
+ my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
+
+ return if !scalar(keys %{$conf->{pending}});
my $add_apply_error = sub {
my ($opt, $msg) = @_;
die "internal error";
} elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
+ } elsif (defined($conf->{$opt}) && $opt =~ m/^net\d+$/) {
+ if($have_sdn) {
+ my $net = PVE::QemuServer::parse_net($conf->{$opt});
+ eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
+ warn if $@;
+ }
}
};
if (my $err = $@) {
PVE::QemuConfig->cleanup_pending($conf);
+ my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
+
foreach my $opt (keys %{$conf->{pending}}) { # add/change
next if $opt eq 'delete'; # just to be sure
eval {
if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
+ } elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) {
+ return if !$have_sdn; # return from eval if SDN is not available
+
+ my $new_net = PVE::QemuServer::parse_net($conf->{pending}->{$opt});
+ if ($conf->{$opt}) {
+ my $old_net = PVE::QemuServer::parse_net($conf->{$opt});
+
+ if (defined($old_net->{bridge}) && defined($old_net->{macaddr}) && (
+ safe_string_ne($old_net->{bridge}, $new_net->{bridge}) ||
+ safe_string_ne($old_net->{macaddr}, $new_net->{macaddr})
+ )) {
+ PVE::Network::SDN::Vnets::del_ips_from_mac($old_net->{bridge}, $old_net->{macaddr}, $conf->{name});
+ }
+ }
+ #fixme: reuse ip if mac change && same bridge
+ PVE::Network::SDN::Vnets::add_next_free_cidr($new_net->{bridge}, $conf->{name}, $new_net->{macaddr}, $vmid, undef, 1);
}
};
if (my $err = $@) {
$add_apply_error->($opt, $err);
} else {
+
+ if (is_valid_drivename($opt)) {
+ my $drive = parse_drive($opt, $conf->{pending}->{$opt});
+ $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
+ }
+
$conf->{$opt} = delete $conf->{pending}->{$opt};
}
}
# write all changes at once to avoid unnecessary i/o
PVE::QemuConfig->write_config($vmid, $conf);
+ if ($generate_cloudinit) {
+ if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
+ # After successful generation and if there were changes to be applied, update the
+ # config to drop the {cloudinit} entry.
+ PVE::QemuConfig->write_config($vmid, $conf);
+ }
+ }
}
sub vmconfig_update_net {
if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
- !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
+ safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
+ !($newnet->{bridge} && $oldnet->{bridge})
+ ) { # bridge/nat mode change
# for non online change, we try to hot-unplug
die "skip\n" if !$hotplug;
vm_deviceunplug($vmid, $conf, $opt);
+
+ if ($have_sdn) {
+ PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
+ }
+
} else {
die "internal error" if $opt !~ m/net(\d+)/;
if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
- safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
+ safe_num_ne($oldnet->{firewall}, $newnet->{firewall})
+ ) {
PVE::Network::tap_unplug($iface);
+ #set link_down in guest if bridge or vlan change to notify guest (dhcp renew for example)
+ if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
+ safe_num_ne($oldnet->{tag}, $newnet->{tag})
+ ) {
+ qemu_set_link_status($vmid, $opt, 0);
+ }
+
+ if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge})) {
+ if ($have_sdn) {
+ PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
+ PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
+ }
+ }
+
if ($have_sdn) {
PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
} else {
PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
}
+
+ #set link_up in guest if bridge or vlan change to notify guest (dhcp renew for example)
+ if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
+ safe_num_ne($oldnet->{tag}, $newnet->{tag})
+ ) {
+ qemu_set_link_status($vmid, $opt, 1);
+ }
+
} elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
# Rate can be applied on its own but any change above needs to
# include the rate in tap_plug since OVS resets everything.
}
if ($hotplug) {
+ if ($have_sdn) {
+ PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
+ PVE::Network::SDN::Vnets::add_dhcp_mapping($newnet->{bridge}, $newnet->{macaddr}, $vmid, $conf->{name});
+ }
vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
} else {
die "skip\n";
}
}
-sub vmconfig_update_disk {
- my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
+sub vmconfig_update_agent {
+ my ($conf, $opt, $value) = @_;
- my $drive = parse_drive($opt, $value);
+ die "skip\n" if !$conf->{$opt};
- if ($conf->{$opt}) {
+ my $hotplug_options = { fstrim_cloned_disks => 1 };
- if (my $old_drive = parse_drive($opt, $conf->{$opt})) {
+ my $old_agent = parse_guest_agent($conf);
+ my $agent = parse_guest_agent({$opt => $value});
- my $media = $drive->{media} || 'disk';
- my $oldmedia = $old_drive->{media} || 'disk';
- die "unable to change media type\n" if $media ne $oldmedia;
+ for my $option (keys %$agent) { # added/changed options
+ next if defined($hotplug_options->{$option});
+ die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
+ }
- if (!drive_is_cdrom($old_drive)) {
+ for my $option (keys %$old_agent) { # removed options
+ next if defined($hotplug_options->{$option});
+ die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
+ }
- if ($drive->{file} ne $old_drive->{file}) {
+ return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
+}
- die "skip\n" if !$hotplug;
+sub vmconfig_update_disk {
+ my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
- # unplug and register as unused
- vm_deviceunplug($vmid, $conf, $opt);
- vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
+ my $drive = parse_drive($opt, $value);
- } else {
- # update existing disk
-
- # skip non hotpluggable value
- if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
- safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
- safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
- safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
- safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
- die "skip\n";
- }
+ if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
+ my $media = $drive->{media} || 'disk';
+ my $oldmedia = $old_drive->{media} || 'disk';
+ die "unable to change media type\n" if $media ne $oldmedia;
- # apply throttle
- if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
- safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
- safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
- safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
- safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
- safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
- safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
- safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
- safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
- safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
- safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
- safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
- safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
- safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
- safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
- safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
- safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
- safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
-
- qemu_block_set_io_throttle($vmid,"drive-$opt",
- ($drive->{mbps} || 0)*1024*1024,
- ($drive->{mbps_rd} || 0)*1024*1024,
- ($drive->{mbps_wr} || 0)*1024*1024,
- $drive->{iops} || 0,
- $drive->{iops_rd} || 0,
- $drive->{iops_wr} || 0,
- ($drive->{mbps_max} || 0)*1024*1024,
- ($drive->{mbps_rd_max} || 0)*1024*1024,
- ($drive->{mbps_wr_max} || 0)*1024*1024,
- $drive->{iops_max} || 0,
- $drive->{iops_rd_max} || 0,
- $drive->{iops_wr_max} || 0,
- $drive->{bps_max_length} || 1,
- $drive->{bps_rd_max_length} || 1,
- $drive->{bps_wr_max_length} || 1,
- $drive->{iops_max_length} || 1,
- $drive->{iops_rd_max_length} || 1,
- $drive->{iops_wr_max_length} || 1);
+ if (!drive_is_cdrom($old_drive)) {
- }
+ if ($drive->{file} ne $old_drive->{file}) {
- return 1;
- }
+ die "skip\n" if !$hotplug;
- } else { # cdrom
+ # unplug and register as unused
+ vm_deviceunplug($vmid, $conf, $opt);
+ vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
- if ($drive->{file} eq 'none') {
- mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
- if (drive_is_cloudinit($old_drive)) {
- vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
- }
- } else {
- my $path = get_iso_path($storecfg, $vmid, $drive->{file});
+ } else {
+ # update existing disk
+
+ # skip non hotpluggable value
+ if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
+ safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
+ safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
+ safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
+ safe_string_ne($drive->{product}, $old_drive->{product}) ||
+ safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
+ safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
+ safe_string_ne($drive->{vendor}, $old_drive->{vendor}) ||
+ safe_string_ne($drive->{ro}, $old_drive->{ro})) {
+ die "skip\n";
+ }
- # force eject if locked
- mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
+ # apply throttle
+ if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
+ safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
+ safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
+ safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
+ safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
+ safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
+ safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
+ safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
+ safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
+ safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
+ safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
+ safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
+ safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
+ safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
+ safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
+ safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
+ safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
+ safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
+
+ qemu_block_set_io_throttle(
+ $vmid,"drive-$opt",
+ ($drive->{mbps} || 0)*1024*1024,
+ ($drive->{mbps_rd} || 0)*1024*1024,
+ ($drive->{mbps_wr} || 0)*1024*1024,
+ $drive->{iops} || 0,
+ $drive->{iops_rd} || 0,
+ $drive->{iops_wr} || 0,
+ ($drive->{mbps_max} || 0)*1024*1024,
+ ($drive->{mbps_rd_max} || 0)*1024*1024,
+ ($drive->{mbps_wr_max} || 0)*1024*1024,
+ $drive->{iops_max} || 0,
+ $drive->{iops_rd_max} || 0,
+ $drive->{iops_wr_max} || 0,
+ $drive->{bps_max_length} || 1,
+ $drive->{bps_rd_max_length} || 1,
+ $drive->{bps_wr_max_length} || 1,
+ $drive->{iops_max_length} || 1,
+ $drive->{iops_rd_max_length} || 1,
+ $drive->{iops_wr_max_length} || 1,
+ );
- if ($path) {
- mon_cmd($vmid, "blockdev-change-medium",
- id => "$opt", filename => "$path");
- }
}
return 1;
}
+
+ } else { # cdrom
+
+ if ($drive->{file} eq 'none') {
+ mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
+ if (drive_is_cloudinit($old_drive)) {
+ vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
+ }
+ } else {
+ my $path = get_iso_path($storecfg, $vmid, $drive->{file});
+
+ # force eject if locked
+ mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
+
+ if ($path) {
+ mon_cmd($vmid, "blockdev-change-medium",
+ id => "$opt", filename => "$path");
+ }
+ }
+
+ return 1;
}
}
vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
}
+sub vmconfig_update_cloudinit_drive {
+ my ($storecfg, $conf, $vmid) = @_;
+
+ my $cloudinit_ds = undef;
+ my $cloudinit_drive = undef;
+
+ PVE::QemuConfig->foreach_volume($conf, sub {
+ my ($ds, $drive) = @_;
+ if (PVE::QemuServer::drive_is_cloudinit($drive)) {
+ $cloudinit_ds = $ds;
+ $cloudinit_drive = $drive;
+ }
+ });
+
+ return if !$cloudinit_drive;
+
+ if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
+ PVE::QemuConfig->write_config($vmid, $conf);
+ }
+
+ my $running = PVE::QemuServer::check_running($vmid);
+
+ if ($running) {
+ my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
+ if ($path) {
+ mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
+ mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
+ }
+ }
+}
+
# called in locked context by incoming migration
sub vm_migrate_get_nbd_disks {
my ($storecfg, $conf, $replicated_volumes) = @_;
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive);
+ return if $ds eq 'tpmstate0';
my $volid = $drive->{file};
my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
return if $scfg->{shared};
+ my $format = qemu_img_format($scfg, $volname);
+
# replicated disks re-use existing state via bitmap
my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
- $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
+ $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
});
return $local_volumes;
}
sub vm_migrate_alloc_nbd_disks {
my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
- my $format = undef;
-
my $nbd = {};
foreach my $opt (sort keys %$source_volumes) {
- my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
+ my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
if ($use_existing) {
$nbd->{$opt}->{drivestr} = print_drive($drive);
next;
}
- # If a remote storage is specified and the format of the original
- # volume is not available there, fall back to the default format.
- # Otherwise use the same format as the original.
- if (!$storagemap->{identity}) {
- $storeid = map_storage($storagemap, $storeid);
- my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- my $fileFormat = qemu_img_format($scfg, $volname);
- $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
- } else {
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
- }
+ $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
+
+ # order of precedence, filtered by whether storage supports it:
+ # 1. explicit requested format
+ # 2. default format of storage
+ my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
+ $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
- my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, ($drive->{size}/1024));
+ my $size = $drive->{size} / 1024;
+ my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
my $newdrive = $drive;
$newdrive->{format} = $format;
$newdrive->{file} = $newvolid;
return PVE::QemuConfig->lock_config($vmid, sub {
my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
- die "you can't start a vm if it's a template\n" if PVE::QemuConfig->is_template($conf);
+ die "you can't start a vm if it's a template\n"
+ if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
+
+ my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
+ my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
- $params->{resume} = PVE::QemuConfig->has_lock($conf, 'suspended');
+ my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
+
+ if ($has_backup_lock && $running) {
+ # a backup is currently running, attempt to start the guest in the
+ # existing QEMU instance
+ return vm_resume($vmid);
+ }
PVE::QemuConfig->check_lock($conf)
- if !($params->{skiplock} || $params->{resume});
+ if !($params->{skiplock} || $has_suspended_lock);
+
+ $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
- die "VM $vmid already running\n" if check_running($vmid, undef, $migrate_opts->{migratedfrom});
+ die "VM $vmid already running\n" if $running;
if (my $storagemap = $migrate_opts->{storagemap}) {
my $replicated = $migrate_opts->{replicated_volumes};
# params:
# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
# skiplock => 0/1, skip checking for config lock
-# forcemachine => to force Qemu machine (rollback/migration)
+# skiptemplate => 0/1, skip checking whether VM is template
+# forcemachine => to force QEMU machine (rollback/migration)
# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
# timeout => in seconds
# paused => start VM in paused state (backup)
# resume => resume from hibernation
+# live-restore-backing => {
+# sata0 => {
+# name => blockdev-name,
+# blockdev => "arg to the -blockdev command instantiating device named 'name'",
+# },
+# virtio2 => ...
+# }
# migrate_opts:
# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
# migratedfrom => source node
# network => CIDR of migration network
# type => secure/insecure - tunnel over encrypted connection or plain-text
# nbd_proto_version => int, 0 for TCP, 1 for UNIX
-# replicated_volumes = which volids should be re-used with bitmaps for nbd migration
+# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
+# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
+# contained in config
sub vm_start_nolock {
my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
$conf = PVE::QemuConfig->load_config($vmid); # update/reload
}
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
+ # don't regenerate the ISO if the VM is started as part of a live migration
+ # this way we can reuse the old ISO with the correct config
+ if (!$migratedfrom) {
+ if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
+ # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
+ # $conf->{cloudinit}, so we could just not do this?
+ # But we do it above, so for now let's be consistent.
+ $conf = PVE::QemuConfig->load_config($vmid); # update/reload
+ }
+ }
+
+ # override offline migrated volumes, conf is out of date still
+ if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
+ for my $key (sort keys $offline_volumes->%*) {
+ my $parsed = parse_drive($key, $conf->{$key});
+ $parsed->{file} = $offline_volumes->{$key};
+ $conf->{$key} = print_drive($parsed);
+ }
+ }
my $defaults = load_defaults();
# set environment variable useful inside network script
- $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
+ # for remote migration the config is available on the target node!
+ if (!$migrate_opts->{remote_node}) {
+ $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
+ }
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
print "Resuming suspended VM\n";
}
- my ($cmd, $vollist, $spice_port) =
- config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
+ my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
+ $conf, $defaults, $forcemachine, $forcecpu, $params->{'live-restore-backing'});
my $migration_ip;
my $get_migration_ip = sub {
return $migration_ip;
};
- my $migrate_uri;
if ($statefile) {
if ($statefile eq 'tcp') {
- my $localip = "localhost";
+ my $migrate = $res->{migrate} = { proto => 'tcp' };
+ $migrate->{addr} = "localhost";
my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
my $nodename = nodename();
}
if ($migration_type eq 'insecure') {
- $localip = $get_migration_ip->($nodename);
- $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
+ $migrate->{addr} = $get_migration_ip->($nodename);
+ $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
}
- my $pfamily = PVE::Tools::get_host_address_family($nodename);
- my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
- $migrate_uri = "tcp:${localip}:${migrate_port}";
- push @$cmd, '-incoming', $migrate_uri;
+ # see #4501: port reservation should be done close to usage - tell QEMU where to listen
+ # via QMP later
+ push @$cmd, '-incoming', 'defer';
push @$cmd, '-S';
} elsif ($statefile eq 'unix') {
# should be default for secure migrations as a ssh TCP forward
# tunnel is not deterministic reliable ready and fails regurarly
# to set up in time, so use UNIX socket forwards
- my $socket_addr = "/run/qemu-server/$vmid.migrate";
- unlink $socket_addr;
-
- $migrate_uri = "unix:$socket_addr";
+ my $migrate = $res->{migrate} = { proto => 'unix' };
+ $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
+ unlink $migrate->{addr};
- push @$cmd, '-incoming', $migrate_uri;
+ $migrate->{uri} = "unix:$migrate->{addr}";
+ push @$cmd, '-incoming', $migrate->{uri};
push @$cmd, '-S';
} elsif (-e $statefile) {
push @$cmd, '-S';
}
- # host pci devices
- for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
- my $d = parse_hostpci($conf->{"hostpci$i"});
- next if !$d;
- my $pcidevices = $d->{pciid};
- foreach my $pcidevice (@$pcidevices) {
- my $pciid = $pcidevice->{id};
+ my $memory = get_current_memory($conf->{memory});
+ my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume);
- my $info = PVE::SysFSTools::pci_device_info("$pciid");
- die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support();
- die "no pci device info for device '$pciid'\n" if !$info;
+ my $pci_reserve_list = [];
+ for my $device (values $pci_devices->%*) {
+ next if $device->{mdev}; # we don't reserve for mdev devices
+ push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
+ }
- if ($d->{mdev}) {
- my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i);
- PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $d->{mdev});
- } else {
- die "can't unbind/bind pci group to vfio '$pciid'\n"
- if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
- die "can't reset pci device '$pciid'\n"
- if $info->{has_fl_reset} and !PVE::SysFSTools::pci_dev_reset($info);
+ # reserve all PCI IDs before actually doing anything with them
+ PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
+
+ eval {
+ my $uuid;
+ for my $id (sort keys %$pci_devices) {
+ my $d = $pci_devices->{$id};
+ my ($index) = ($id =~ m/^hostpci(\d+)$/);
+
+ my $chosen_mdev;
+ for my $dev ($d->{ids}->@*) {
+ my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
+ if ($d->{mdev}) {
+ warn $@ if $@;
+ $chosen_mdev = $info;
+ last if $chosen_mdev; # if successful, we're done
+ } else {
+ die $@ if $@;
+ }
}
- }
+
+ next if !$d->{mdev};
+ die "could not create mediated device\n" if !defined($chosen_mdev);
+
+ # nvidia grid needs the uuid of the mdev as qemu parameter
+ if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
+ if (defined($conf->{smbios1})) {
+ my $smbios_conf = parse_smbios1($conf->{smbios1});
+ $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
+ }
+ $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
+ }
+ }
+ push @$cmd, '-uuid', $uuid if defined($uuid);
+ };
+ if (my $err = $@) {
+ eval { cleanup_pci_devices($vmid, $conf) };
+ warn $@ if $@;
+ die $err;
}
PVE::Storage::activate_volumes($storecfg, $vollist);
- eval {
- run_command(['/bin/systemctl', 'stop', "$vmid.scope"],
- outfunc => sub {}, errfunc => sub {});
- };
+
+ my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
+ eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
+ eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
# Issues with the above 'stop' not being fully completed are extremely rare, a very low
# timeout should be more than enough here...
- PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
+ PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
- my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits}
- : $defaults->{cpuunits};
+ my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
- my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
my %run_params = (
timeout => $statefile ? undef : $start_timeout,
umask => 0077,
$run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
}
- my %properties = (
+ my %systemd_properties = (
Slice => 'qemu.slice',
- KillMode => 'none',
- CPUShares => $cpuunits
+ KillMode => 'process',
+ SendSIGKILL => 0,
+ TimeoutStopUSec => ULONG_MAX, # infinity
);
+ if (PVE::CGroup::cgroup_mode() == 2) {
+ $systemd_properties{CPUWeight} = $cpuunits;
+ } else {
+ $systemd_properties{CPUShares} = $cpuunits;
+ }
+
if (my $cpulimit = $conf->{cpulimit}) {
- $properties{CPUQuota} = int($cpulimit * 100);
+ $systemd_properties{CPUQuota} = int($cpulimit * 100);
}
- $properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
+ $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
my $run_qemu = sub {
PVE::Tools::run_fork sub {
- PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties);
+ PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
+
+ my $tpmpid;
+ if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
+ # start the TPM emulator so QEMU can connect on start
+ $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
+ }
my $exitcode = run_command($cmd, %run_params);
- die "QEMU exited with code $exitcode\n" if $exitcode;
+ if ($exitcode) {
+ if ($tpmpid) {
+ warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
+ kill 'TERM', $tpmpid;
+ }
+ die "QEMU exited with code $exitcode\n";
+ }
};
};
if ($conf->{hugepages}) {
my $code = sub {
- my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
+ my $hotplug_features =
+ parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+ my $hugepages_topology =
+ PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
+
my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
PVE::QemuServer::Memory::hugepages_mount();
eval { $run_qemu->() };
if (my $err = $@) {
- PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology);
+ PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
+ if !$conf->{keephugepages};
die $err;
}
- PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology);
+ PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
+ if !$conf->{keephugepages};
};
eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
if (my $err = $@) {
# deactivate volumes if start fails
eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
+ warn $@ if $@;
+ eval { cleanup_pci_devices($vmid, $conf) };
+ warn $@ if $@;
+
die "start failed: $err";
}
- print "migration listens on $migrate_uri\n" if $migrate_uri;
- $res->{migrate_uri} = $migrate_uri;
+ # re-reserve all PCI IDs now that we can know the actual VM PID
+ my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
+ eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
+ warn $@ if $@;
- if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
+ if (defined(my $migrate = $res->{migrate})) {
+ if ($migrate->{proto} eq 'tcp') {
+ my $nodename = nodename();
+ my $pfamily = PVE::Tools::get_host_address_family($nodename);
+ $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
+ $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
+ mon_cmd($vmid, "migrate-incoming", uri => $migrate->{uri});
+ }
+ print "migration listens on $migrate->{uri}\n";
+ } elsif ($statefile) {
eval { mon_cmd($vmid, "cont"); };
warn $@ if $@;
}
my $migrate_storage_uri;
# nbd_protocol_version > 0 for unix socket support
- if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
+ if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
$migrate_storage_uri = "nbd:unix:$socket_path";
+ $res->{migrate}->{unix_sockets} = [$socket_path];
} else {
my $nodename = nodename();
my $localip = $get_migration_ip->($nodename);
my $pfamily = PVE::Tools::get_host_address_family($nodename);
my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
- mon_cmd($vmid, "nbd-server-start", addr => { type => 'inet', data => { host => "${localip}", port => "${storage_migrate_port}" } } );
+ mon_cmd($vmid, "nbd-server-start", addr => {
+ type => 'inet',
+ data => {
+ host => "${localip}",
+ port => "${storage_migrate_port}",
+ },
+ });
$localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
$migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
}
- $res->{migrate_storage_uri} = $migrate_storage_uri;
+ my $block_info = mon_cmd($vmid, "query-block");
+ $block_info = { map { $_->{device} => $_ } $block_info->@* };
foreach my $opt (sort keys %$nbd) {
my $drivestr = $nbd->{$opt}->{drivestr};
my $volid = $nbd->{$opt}->{volid};
- mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
+
+ my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
+
+ mon_cmd(
+ $vmid,
+ "block-export-add",
+ id => "drive-$opt",
+ 'node-name' => $block_node,
+ writable => JSON::true,
+ type => "nbd",
+ name => "drive-$opt", # NBD export name
+ );
+
my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
print "storage migration listens on $nbd_uri volume:$drivestr\n";
print "re-using replicated volume: $opt - $volid\n"
print "spice listens on port $spice_port\n";
$res->{spice_port} = $spice_port;
if ($migrate_opts->{spice_ticket}) {
- mon_cmd($vmid, "set_password", protocol => 'spice', password => $migrate_opts->{spice_ticket});
+ mon_cmd($vmid, "set_password", protocol => 'spice', password =>
+ $migrate_opts->{spice_ticket});
mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
}
}
my $nicconf = parse_net($conf->{$opt});
qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
}
+ add_nets_bridge_fdb($conf, $vmid);
}
- mon_cmd($vmid, 'qom-set',
+ if (!defined($conf->{balloon}) || $conf->{balloon}) {
+ eval {
+ mon_cmd(
+ $vmid,
+ 'qom-set',
path => "machine/peripheral/balloon0",
property => "guest-stats-polling-interval",
- value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
+ value => 2
+ );
+ };
+ log_warn("could not set polling interval for ballooning - $@") if $@;
+ }
if ($resume) {
print "Resumed VM, removing state\n";
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
+ my ($current_machine, $is_deprecated) =
+ PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
+ if ($is_deprecated) {
+ log_warn(
+ "current machine version '$current_machine' is deprecated - see the documentation and ".
+ "change to a newer one",
+ );
+ }
+
return $res;
}
my ($storecfg, $vmid, $snapname) = @_;
my $conf = PVE::QemuConfig->load_config($vmid);
- my $forcemachine;
- my $forcecpu;
+ my ($forcemachine, $forcecpu);
if ($snapname) {
my $snapshot = $conf->{snapshots}->{$snapname};
die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
my $defaults = load_defaults();
- my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults,
- $forcemachine, $forcecpu);
+ my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
return PVE::Tools::cmd2string($cmd);
}
return $vollist;
}
+sub cleanup_pci_devices {
+ my ($vmid, $conf) = @_;
+
+ foreach my $key (keys %$conf) {
+ next if $key !~ m/^hostpci(\d+)$/;
+ my $hostpciindex = $1;
+ my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
+ my $d = parse_hostpci($conf->{$key});
+ if ($d->{mdev}) {
+ # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
+ # don't want to break ABI just for this two liner
+ my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
+
+ # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
+ # out when we do it first. so wait for up to 10 seconds and then try it manually
+ if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/ && -e $dev_sysfs_dir) {
+ my $count = 0;
+ while (-e $dev_sysfs_dir && $count < 10) {
+ sleep 1;
+ $count++;
+ }
+ print "waited $count seconds for mediated device driver finishing clean up\n";
+ }
+
+ if (-e $dev_sysfs_dir) {
+ print "actively clean up mediated device with UUID $uuid\n";
+ PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1");
+ }
+ }
+ }
+ PVE::QemuServer::PCI::remove_pci_reservation($vmid);
+}
+
sub vm_stop_cleanup {
my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
if (!$keepActive) {
my $vollist = get_vm_volumes($conf);
PVE::Storage::deactivate_volumes($storecfg, $vollist);
+
+ if (my $tpmdrive = $conf->{tpmstate0}) {
+ my $tpm = parse_drive("tpmstate0", $tpmdrive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+ if ($storeid) {
+ PVE::Storage::unmap_volume($storecfg, $tpm->{file});
+ }
+ }
}
foreach my $ext (qw(mon qmp pid vnc qga)) {
}
if ($conf->{ivshmem}) {
- my $ivshmem = PVE::JSONSchema::parse_property_string($ivshmem_fmt, $conf->{ivshmem});
+ my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
# just delete it for now, VMs which have this already open do not
# are affected, but new VMs will get a separated one. If this
# becomes an issue we either add some sort of ref-counting or just
unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
}
- foreach my $key (keys %$conf) {
- next if $key !~ m/^hostpci(\d+)$/;
- my $hostpciindex = $1;
- my $d = parse_hostpci($conf->{$key});
- my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
-
- foreach my $pci (@{$d->{pciid}}) {
- my $pciid = $pci->{id};
- PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
- }
- }
+ cleanup_pci_devices($vmid, $conf);
vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
};
eval {
if ($shutdown) {
- if (defined($conf) && parse_guest_agent($conf)->{enabled}) {
+ if (defined($conf) && get_qga_key($conf, 'enabled')) {
mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
} else {
mon_cmd($vmid, "system_powerdown");
}
- $vmstate = PVE::QemuConfig->__snapshot_save_vmstate($vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
+ $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
+ $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
$path = PVE::Storage::path($storecfg, $vmstate);
PVE::QemuConfig->write_config($vmid, $conf);
} else {
PVE::Storage::activate_volumes($storecfg, [$vmstate]);
eval {
+ set_migration_caps($vmid, 1);
mon_cmd($vmid, "savevm-start", statefile => $path);
for(;;) {
my $state = mon_cmd($vmid, "query-savevm");
}
}
+# $nocheck is set when called as part of a migration - in this context the
+# location of the config file (source or target node) is not deterministic,
+# since migration cannot wait for pmxcfs to process the rename
sub vm_resume {
my ($vmid, $skiplock, $nocheck) = @_;
PVE::QemuConfig->lock_config($vmid, sub {
my $res = mon_cmd($vmid, 'query-status');
my $resume_cmd = 'cont';
+ my $reset = 0;
+ my $conf;
+ if ($nocheck) {
+ $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
+ if ($@) {
+ my $vmlist = PVE::Cluster::get_vmlist();
+ if (exists($vmlist->{ids}->{$vmid})) {
+ my $node = $vmlist->{ids}->{$vmid}->{node};
+ $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
+ }
+ if (!$conf) {
+ PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
+ $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
+ }
+ }
+ } else {
+ $conf = PVE::QemuConfig->load_config($vmid);
+ }
- if ($res->{status} && $res->{status} eq 'suspended') {
- $resume_cmd = 'system_wakeup';
+ if ($res->{status}) {
+ return if $res->{status} eq 'running'; # job done, go home
+ $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
+ $reset = 1 if $res->{status} eq 'shutdown';
}
if (!$nocheck) {
-
- my $conf = PVE::QemuConfig->load_config($vmid);
-
PVE::QemuConfig->check_lock($conf)
if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
}
+ if ($reset) {
+ # required if a VM shuts down during a backup and we get a resume
+ # request before the backup finishes for example
+ mon_cmd($vmid, "system_reset");
+ }
+
+ add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
+
mon_cmd($vmid, $resume_cmd);
});
}
});
}
+sub check_bridge_access {
+ my ($rpcenv, $authuser, $conf) = @_;
+
+ return 1 if $authuser eq 'root@pam';
+
+ for my $opt (sort keys $conf->%*) {
+ next if $opt !~ m/^net\d+$/;
+ my $net = parse_net($conf->{$opt});
+ my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
+ PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
+ }
+ return 1;
+};
+
+sub check_mapping_access {
+ my ($rpcenv, $user, $conf) = @_;
+
+ for my $opt (keys $conf->%*) {
+ if ($opt =~ m/^usb\d+$/) {
+ my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
+ if (my $host = $device->{host}) {
+ die "only root can set '$opt' config for real devices\n"
+ if $host !~ m/^spice$/i && $user ne 'root@pam';
+ } elsif ($device->{mapping}) {
+ $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
+ } else {
+ die "either 'host' or 'mapping' must be set.\n";
+ }
+ } elsif ($opt =~ m/^hostpci\d+$/) {
+ my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
+ if ($device->{host}) {
+ die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
+ } elsif ($device->{mapping}) {
+ $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
+ } else {
+ die "either 'host' or 'mapping' must be set.\n";
+ }
+ }
+ }
+};
+
+sub check_restore_permissions {
+ my ($rpcenv, $user, $conf) = @_;
+
+ check_bridge_access($rpcenv, $user, $conf);
+ check_mapping_access($rpcenv, $user, $conf);
+}
# vzdump restore implementaion
sub tar_archive_read_firstfile {
my $restore_cleanup_oldconf = sub {
my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
+ my $kept_disks = {};
+
PVE::QemuConfig->foreach_volume($oldconf, sub {
my ($ds, $drive) = @_;
if (my $err = $@) {
warn $err;
}
+ } else {
+ $kept_disks->{$volid} = 1;
}
});
- # delete vmstate files, after the restore we have no snapshots anymore
- foreach my $snapname (keys %{$oldconf->{snapshots}}) {
+ # after the restore we have no snapshots anymore
+ for my $snapname (keys $oldconf->{snapshots}->%*) {
my $snap = $oldconf->{snapshots}->{$snapname};
if ($snap->{vmstate}) {
eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
warn $err;
}
}
+
+ for my $volid (keys $kept_disks->%*) {
+ eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
+ warn $@ if $@;
+ }
}
};
my $parse_backup_hints = sub {
my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
- my $virtdev_hash = {};
+ my $check_storage = sub { # assert if an image can be allocate
+ my ($storeid, $scfg) = @_;
+ die "Content type 'images' is not available on storage '$storeid'\n"
+ if !$scfg->{content}->{images};
+ $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
+ if $user ne 'root@pam';
+ };
+ my $virtdev_hash = {};
while (defined(my $line = <$fh>)) {
if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
$devinfo->{$devname}->{format} = $format;
$devinfo->{$devname}->{storeid} = $storeid;
- # check permission on storage
- my $pool = $options->{pool}; # todo: do we need that?
- if ($user ne 'root@pam') {
- $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
- }
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $check_storage->($storeid, $scfg); # permission and content type check
$virtdev_hash->{$virtdev} = $devinfo->{$devname};
} elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
my $virtdev = $1;
my $drive = parse_drive($virtdev, $2);
+
if (drive_is_cloudinit($drive)) {
my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
$storeid = $options->{storage} if defined ($options->{storage});
my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
+ $check_storage->($storeid, $scfg); # permission and content type check
+
$virtdev_hash->{$virtdev} = {
format => $format,
storeid => $storeid,
my $name;
if ($d->{is_cloudinit}) {
$name = "vm-$vmid-cloudinit";
- $name .= ".$d->{format}" if $d->{format} ne 'raw';
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ if ($scfg->{path}) {
+ $name .= ".$d->{format}";
+ }
}
- my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
+ my $volid = PVE::Storage::vdisk_alloc(
+ $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
print STDERR "new volume ID is '$volid'\n";
$d->{volid} = $volid;
return $map;
};
-my $restore_update_config_line = sub {
- my ($outfd, $cookie, $vmid, $map, $line, $unique) = @_;
+sub restore_update_config_line {
+ my ($cookie, $map, $line, $unique) = @_;
- return if $line =~ m/^\#qmdump\#/;
- return if $line =~ m/^\#vzdump\#/;
- return if $line =~ m/^lock:/;
- return if $line =~ m/^unused\d+:/;
- return if $line =~ m/^parent:/;
+ return '' if $line =~ m/^\#qmdump\#/;
+ return '' if $line =~ m/^\#vzdump\#/;
+ return '' if $line =~ m/^lock:/;
+ return '' if $line =~ m/^unused\d+:/;
+ return '' if $line =~ m/^parent:/;
+
+ my $res = '';
my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
};
my $netstr = print_net($net);
- print $outfd "net$cookie->{netcount}: $netstr\n";
+ $res .= "net$cookie->{netcount}: $netstr\n";
$cookie->{netcount}++;
}
} elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
my $net = parse_net($netstr);
$net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
$netstr = print_net($net);
- print $outfd "$id: $netstr\n";
- } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk)\d+):\s*(\S+)\s*$/) {
+ $res .= "$id: $netstr\n";
+ } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
my $virtdev = $1;
my $value = $3;
my $di = parse_drive($virtdev, $value);
if (defined($di->{backup}) && !$di->{backup}) {
- print $outfd "#$line";
+ $res .= "#$line";
} elsif ($map->{$virtdev}) {
delete $di->{format}; # format can change on restore
$di->{file} = $map->{$virtdev};
$value = print_drive($di);
- print $outfd "$virtdev: $value\n";
+ $res .= "$virtdev: $value\n";
} else {
- print $outfd $line;
+ $res .= $line;
}
} elsif (($line =~ m/^vmgenid: (.*)/)) {
my $vmgenid = $1;
# always generate a new vmgenid if there was a valid one setup
$vmgenid = generate_uuid();
}
- print $outfd "vmgenid: $vmgenid\n";
+ $res .= "vmgenid: $vmgenid\n";
} elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
my ($uuid, $uuid_str);
UUID::generate($uuid);
UUID::unparse($uuid, $uuid_str);
my $smbios1 = parse_smbios1($2);
$smbios1->{uuid} = $uuid_str;
- print $outfd $1.print_smbios1($smbios1)."\n";
+ $res .= $1.print_smbios1($smbios1)."\n";
} else {
- print $outfd $line;
+ $res .= $line;
}
-};
+
+ return $res;
+}
my $restore_deactivate_volumes = sub {
- my ($storecfg, $devinfo) = @_;
+ my ($storecfg, $virtdev_hash) = @_;
my $vollist = [];
- foreach my $devname (keys %$devinfo) {
- my $volid = $devinfo->{$devname}->{volid};
- push @$vollist, $volid if $volid;
+ for my $dev (values $virtdev_hash->%*) {
+ push $vollist->@*, $dev->{volid} if $dev->{volid};
}
- PVE::Storage::deactivate_volumes($storecfg, $vollist);
+ eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
+ print STDERR $@ if $@;
};
my $restore_destroy_volumes = sub {
- my ($storecfg, $devinfo) = @_;
+ my ($storecfg, $virtdev_hash) = @_;
- foreach my $devname (keys %$devinfo) {
- my $volid = $devinfo->{$devname}->{volid};
- next if !$volid;
+ for my $dev (values $virtdev_hash->%*) {
+ my $volid = $dev->{volid} or next;
eval {
- if ($volid =~ m|^/|) {
- unlink $volid || die 'unlink failed\n';
- } else {
- PVE::Storage::vdisk_free($storecfg, $volid);
- }
+ PVE::Storage::vdisk_free($storecfg, $volid);
print STDERR "temporary volume '$volid' sucessfuly removed\n";
};
print STDERR "unable to cleanup '$volid' - $@" if $@;
}
-};
+};
+
+sub restore_merge_config {
+ my ($filename, $backup_conf_raw, $override_conf) = @_;
+
+ my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
+ for my $key (keys $override_conf->%*) {
+ $backup_conf->{$key} = $override_conf->{$key};
+ }
+
+ return $backup_conf;
+}
sub scan_volids {
my ($cfg, $vmid) = @_;
- my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid);
+ my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
my $volid_hash = {};
foreach my $storeid (keys %$info) {
my ($vmid, $conf, $volid_hash) = @_;
my $changes;
- my $prefix = "VM $vmid:";
+ my $prefix = "VM $vmid";
# used and unused disks
my $referenced = {};
my $volid = $drive->{file};
return if !$volid;
+ my $volume = $volid_hash->{$volid};
# mark volid as "in-use" for next step
$referenced->{$volid} = 1;
- if ($volid_hash->{$volid} &&
- (my $path = $volid_hash->{$volid}->{path})) {
+ if ($volume && (my $path = $volume->{path})) {
$referencedpath->{$path} = 1;
}
return if drive_is_cdrom($drive);
- return if !$volid_hash->{$volid};
+ return if !$volume;
- my ($updated, $old_size, $new_size) = PVE::QemuServer::Drive::update_disksize($drive, $volid_hash);
+ my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
if (defined($updated)) {
$changes = 1;
$conf->{$opt} = print_drive($updated);
- print "$prefix size of disk '$volid' ($opt) updated from $old_size to $new_size\n";
+ print "$prefix ($opt): $msg\n";
}
});
my $volid = $drive->{file};
return if !$volid;
- my $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
+ my $path;
+ $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
$changes = 1;
my $cfg = PVE::Storage::config();
- # FIXME: Remove once our RBD plugin can handle CT and VM on a single storage
- # see: https://pve.proxmox.com/pipermail/pve-devel/2018-July/032900.html
- foreach my $stor (keys %{$cfg->{ids}}) {
- delete($cfg->{ids}->{$stor}) if ! $cfg->{ids}->{$stor}->{content}->{images};
- }
-
print "rescan volumes...\n";
my $volid_hash = scan_volids($cfg, $vmid);
my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- my $server = $scfg->{server};
- my $datastore = $scfg->{datastore};
- my $username = $scfg->{username} // 'root@pam';
my $fingerprint = $scfg->{fingerprint};
+ my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
- my $repo = "$username\@$server:$datastore";
+ my $repo = PVE::PBSClient::get_repository($scfg);
+ my $namespace = $scfg->{namespace};
+
+ # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
local $ENV{PBS_PASSWORD} = $password;
local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
mkpath $tmpdir;
my $conffile = PVE::QemuConfig->config_file($vmid);
- my $tmpfn = "$conffile.$$.tmp";
# disable interrupts (always do cleanups)
local $SIG{INT} =
local $SIG{TERM} =
# Note: $oldconf is undef if VM does not exists
my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
+ my $new_conf_raw = '';
my $rpcenv = PVE::RPCEnvironment::get();
- my $devinfo = {};
+ my $devinfo = {}; # info about drives included in backup
+ my $virtdev_hash = {}; # info about allocated drives
eval {
# enable interrupts
my $index = PVE::Tools::file_get_contents($index_fn);
$index = decode_json($index);
- # print Dumper($index);
foreach my $info (@{$index->{files}}) {
if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
my $devname = $1;
}
}
- my $is_qemu_server_backup = scalar(grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}});
+ my $is_qemu_server_backup = scalar(
+ grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
+ );
if (!$is_qemu_server_backup) {
die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
}
}
my $fh = IO::File->new($cfgfn, "r") ||
- "unable to read qemu-server.conf - $!\n";
+ die "unable to read qemu-server.conf - $!\n";
- my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
+ $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
# fixme: rate limit?
my $d = $virtdev_hash->{$virtdev};
next if $d->{is_cloudinit}; # no need to restore cloudinit
+ # this fails if storage is unavailable
my $volid = $d->{volid};
-
my $path = PVE::Storage::path($storecfg, $volid);
+ # for live-restore we only want to preload the efidisk and TPM state
+ next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
+
+ my @ns_arg;
+ if (defined(my $ns = $scfg->{namespace})) {
+ @ns_arg = ('--ns', $ns);
+ }
+
my $pbs_restore_cmd = [
'/usr/bin/pbs-restore',
'--repository', $repo,
+ @ns_arg,
$pbs_backup_name,
"$d->{devname}.img.fidx",
$path,
];
push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
+ push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
push @$pbs_restore_cmd, '--skip-zero';
$fh->seek(0, 0) || die "seek failed - $!\n";
- my $outfd = new IO::File ($tmpfn, "w") ||
- die "unable to write config for VM $vmid\n";
-
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $options->{unique});
+ $new_conf_raw .= restore_update_config_line(
+ $cookie,
+ $map,
+ $line,
+ $options->{unique},
+ );
}
$fh->close();
- $outfd->close();
};
my $err = $@;
- $restore_deactivate_volumes->($storecfg, $devinfo);
+ if ($err || !$options->{live}) {
+ $restore_deactivate_volumes->($storecfg, $virtdev_hash);
+ }
rmtree $tmpdir;
if ($err) {
- unlink $tmpfn;
- $restore_destroy_volumes->($storecfg, $devinfo);
+ $restore_destroy_volumes->($storecfg, $virtdev_hash);
die $err;
}
- rename($tmpfn, $conffile) ||
- die "unable to commit configuration file '$conffile'\n";
+ if ($options->{live}) {
+ # keep lock during live-restore
+ $new_conf_raw .= "\nlock: create";
+ }
- PVE::Cluster::cfs_update(); # make sure we read new file
+ my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
+ check_restore_permissions($rpcenv, $user, $new_conf);
+ PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
warn $@ if $@;
+
+ PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
+
+ if ($options->{live}) {
+ # enable interrupts
+ local $SIG{INT} =
+ local $SIG{TERM} =
+ local $SIG{QUIT} =
+ local $SIG{HUP} =
+ local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
+
+ my $conf = PVE::QemuConfig->load_config($vmid);
+ die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
+
+ # these special drives are already restored before start
+ delete $devinfo->{'drive-efidisk0'};
+ delete $devinfo->{'drive-tpmstate0-backup'};
+
+ my $pbs_opts = {
+ repo => $repo,
+ keyfile => $keyfile,
+ snapshot => $pbs_backup_name,
+ namespace => $namespace,
+ };
+ pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
+
+ PVE::QemuConfig->remove_lock($vmid, "create");
+ }
+}
+
+sub pbs_live_restore {
+ my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
+
+ print "starting VM for live-restore\n";
+ print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
+
+ my $live_restore_backing = {};
+ for my $ds (keys %$restored_disks) {
+ $ds =~ m/^drive-(.*)$/;
+ my $confname = $1;
+ my $pbs_conf = {};
+ $pbs_conf = {
+ repository => $opts->{repo},
+ snapshot => $opts->{snapshot},
+ archive => "$ds.img.fidx",
+ };
+ $pbs_conf->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
+ $pbs_conf->{namespace} = $opts->{namespace} if defined($opts->{namespace});
+
+ my $drive = parse_drive($confname, $conf->{$confname});
+ print "restoring '$ds' to '$drive->{file}'\n";
+
+ my $pbs_name = "drive-${confname}-pbs";
+ $live_restore_backing->{$confname} = {
+ name => $pbs_name,
+ blockdev => print_pbs_blockdev($pbs_conf, $pbs_name),
+ };
+ }
+
+ my $drives_streamed = 0;
+ eval {
+ # make sure HA doesn't interrupt our restore by stopping the VM
+ if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
+ run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
+ }
+
+ # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
+ # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
+ vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
+
+ my $qmeventd_fd = register_qmeventd_handle($vmid);
+
+ # begin streaming, i.e. data copy from PBS to target disk for every vol,
+ # this will effectively collapse the backing image chain consisting of
+ # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
+ # removes itself once all backing images vanish with 'auto-remove=on')
+ my $jobs = {};
+ for my $ds (sort keys %$restored_disks) {
+ my $job_id = "restore-$ds";
+ mon_cmd($vmid, 'block-stream',
+ 'job-id' => $job_id,
+ device => "$ds",
+ );
+ $jobs->{$job_id} = {};
+ }
+
+ mon_cmd($vmid, 'cont');
+ qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
+
+ print "restore-drive jobs finished successfully, removing all tracking block devices"
+ ." to disconnect from Proxmox Backup Server\n";
+
+ for my $ds (sort keys %$restored_disks) {
+ mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
+ }
+
+ close($qmeventd_fd);
+ };
+
+ my $err = $@;
+
+ if ($err) {
+ warn "An error occurred during live-restore: $err\n";
+ _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
+ die "live-restore failed\n";
+ }
+}
+
+# Inspired by pbs live-restore, this restores with the disks being available as files.
+# Theoretically this can also be used to quick-start a full-clone vm if the
+# disks are all available as files.
+#
+# The mapping should provide a path by config entry, such as
+# `{ scsi0 => { format => <qcow2|raw|...>, path => "/path/to/file", sata1 => ... } }`
+#
+# This is used when doing a `create` call with the `--live-import` parameter,
+# where the disks get an `import-from=` property. The non-live part is
+# therefore already handled in the `$create_disks()` call happening in the
+# `create` api call
+sub live_import_from_files {
+ my ($mapping, $vmid, $conf, $restore_options) = @_;
+
+ my $live_restore_backing = {};
+ for my $dev (keys %$mapping) {
+ die "disk not support for live-restoring: '$dev'\n"
+ if !is_valid_drivename($dev) || $dev =~ /^(?:efidisk|tpmstate)/;
+
+ die "mapping contains disk '$dev' which does not exist in the config\n"
+ if !exists($conf->{$dev});
+
+ my $info = $mapping->{$dev};
+ my ($format, $path) = $info->@{qw(format path)};
+ die "missing path for '$dev' mapping\n" if !$path;
+ die "missing format for '$dev' mapping\n" if !$format;
+ die "invalid format '$format' for '$dev' mapping\n"
+ if !grep { $format eq $_ } qw(raw qcow2 vmdk);
+
+ $live_restore_backing->{$dev} = {
+ name => "drive-$dev-restore",
+ blockdev => "driver=$format,node-name=drive-$dev-restore"
+ . ",read-only=on"
+ . ",file.driver=file,file.filename=$path"
+ };
+ };
+
+ my $storecfg = PVE::Storage::config();
+ eval {
+
+ # make sure HA doesn't interrupt our restore by stopping the VM
+ if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
+ run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
+ }
+
+ vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
+
+ # prevent shutdowns from qmeventd when the VM powers off from the inside
+ my $qmeventd_fd = register_qmeventd_handle($vmid);
+
+ # begin streaming, i.e. data copy from PBS to target disk for every vol,
+ # this will effectively collapse the backing image chain consisting of
+ # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
+ # removes itself once all backing images vanish with 'auto-remove=on')
+ my $jobs = {};
+ for my $ds (sort keys %$live_restore_backing) {
+ my $job_id = "restore-$ds";
+ mon_cmd($vmid, 'block-stream',
+ 'job-id' => $job_id,
+ device => "drive-$ds",
+ );
+ $jobs->{$job_id} = {};
+ }
+
+ mon_cmd($vmid, 'cont');
+ qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
+
+ print "restore-drive jobs finished successfully, removing all tracking block devices\n";
+
+ for my $ds (sort keys %$live_restore_backing) {
+ mon_cmd($vmid, 'blockdev-del', 'node-name' => "drive-$ds-restore");
+ }
+
+ close($qmeventd_fd);
+ };
+
+ my $err = $@;
+
+ if ($err) {
+ warn "An error occurred during live-restore: $err\n";
+ _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
+ die "live-restore failed\n";
+ }
+
+ PVE::QemuConfig->remove_lock($vmid, "import");
}
sub restore_vma_archive {
my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
POSIX::mkfifo($mapfifo, 0600);
my $fifofh;
-
- my $openfifo = sub {
- open($fifofh, '>', $mapfifo) || die $!;
- };
+ my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
$add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
- my $oldtimeout;
- my $timeout = 5;
-
- my $devinfo = {};
+ my $devinfo = {}; # info about drives included in backup
+ my $virtdev_hash = {}; # info about allocated drives
my $rpcenv = PVE::RPCEnvironment::get();
my $conffile = PVE::QemuConfig->config_file($vmid);
- my $tmpfn = "$conffile.$$.tmp";
# Note: $oldconf is undef if VM does not exist
my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
+ my $new_conf_raw = '';
my %storage_limits;
# we can read the config - that is already extracted
my $fh = IO::File->new($cfgfn, "r") ||
- "unable to read qemu-server.conf - $!\n";
+ die "unable to read qemu-server.conf - $!\n";
my $fwcfgfn = "$tmpdir/qemu-server.fw";
if (-f $fwcfgfn) {
PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
}
- my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
+ $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
- foreach my $key (keys %storage_limits) {
- my $limit = PVE::Storage::get_bandwidth_limit('restore', [$key], $bwlimit);
- next if !$limit;
- print STDERR "rate limit for storage $key: $limit KiB/s\n";
- $storage_limits{$key} = $limit * 1024;
+ foreach my $info (values %{$virtdev_hash}) {
+ my $storeid = $info->{storeid};
+ next if defined($storage_limits{$storeid});
+
+ my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
+ print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
+ $storage_limits{$storeid} = $limit * 1024;
}
foreach my $devname (keys %$devinfo) {
$fh->seek(0, 0) || die "seek failed - $!\n";
- my $outfd = new IO::File ($tmpfn, "w") ||
- die "unable to write config for VM $vmid\n";
-
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
+ $new_conf_raw .= restore_update_config_line(
+ $cookie,
+ $map,
+ $line,
+ $opts->{unique},
+ );
}
$fh->close();
- $outfd->close();
};
+ my $oldtimeout;
+
eval {
# enable interrupts
local $SIG{INT} =
local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
local $SIG{ALRM} = sub { die "got timeout\n"; };
- $oldtimeout = alarm($timeout);
+ $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
my $parser = sub {
my $line = shift;
$devinfo->{$devname} = { size => $size, dev_id => $dev_id };
} elsif ($line =~ m/^CTIME: /) {
# we correctly received the vma config, so we can disable
- # the timeout now for disk allocation (set to 10 minutes, so
- # that we always timeout if something goes wrong)
- alarm(600);
+ # the timeout now for disk allocation
+ alarm($oldtimeout || 0);
+ $oldtimeout = undef;
&$print_devmap();
print $fifofh "done\n";
- my $tmp = $oldtimeout || 0;
- $oldtimeout = undef;
- alarm($tmp);
close($fifofh);
+ $fifofh = undef;
}
};
alarm($oldtimeout) if $oldtimeout;
- $restore_deactivate_volumes->($cfg, $devinfo);
+ $restore_deactivate_volumes->($cfg, $virtdev_hash);
+ close($fifofh) if $fifofh;
unlink $mapfifo;
rmtree $tmpdir;
if ($err) {
- unlink $tmpfn;
- $restore_destroy_volumes->($cfg, $devinfo);
+ $restore_destroy_volumes->($cfg, $virtdev_hash);
die $err;
}
- rename($tmpfn, $conffile) ||
- die "unable to commit configuration file '$conffile'\n";
-
- PVE::Cluster::cfs_update(); # make sure we read new file
+ my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
+ check_restore_permissions($rpcenv, $user, $new_conf);
+ PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
warn $@ if $@;
+
+ PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
}
sub restore_tar_archive {
my ($archive, $vmid, $user, $opts) = @_;
+ if (scalar(keys $opts->{override_conf}->%*) > 0) {
+ my $keystring = join(' ', keys $opts->{override_conf}->%*);
+ die "cannot pass along options ($keystring) when restoring from tar archive\n";
+ }
+
if ($archive ne '-') {
my $firstfile = tar_archive_read_firstfile($archive);
- die "ERROR: file '$archive' dos not lock like a QemuServer vzdump backup\n"
+ die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
if $firstfile ne 'qemu-server.conf';
}
local $ENV{VZDUMP_USER} = $user;
my $conffile = PVE::QemuConfig->config_file($vmid);
- my $tmpfn = "$conffile.$$.tmp";
+ my $new_conf_raw = '';
# disable interrupts (always do cleanups)
local $SIG{INT} =
my $confsrc = "$tmpdir/qemu-server.conf";
- my $srcfd = new IO::File($confsrc, "r") ||
- die "unable to open file '$confsrc'\n";
-
- my $outfd = new IO::File ($tmpfn, "w") ||
- die "unable to write config for VM $vmid\n";
+ my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
my $cookie = { netcount => 0 };
while (defined (my $line = <$srcfd>)) {
- $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
+ $new_conf_raw .= restore_update_config_line(
+ $cookie,
+ $map,
+ $line,
+ $opts->{unique},
+ );
}
$srcfd->close();
- $outfd->close();
};
if (my $err = $@) {
- unlink $tmpfn;
tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
die $err;
}
rmtree $tmpdir;
- rename $tmpfn, $conffile ||
- die "unable to commit configuration file '$conffile'\n";
+ PVE::Tools::file_set_contents($conffile, $new_conf_raw);
PVE::Cluster::cfs_update(); # make sure we read new file
rbd => 1,
};
sub do_snapshots_with_qemu {
- my ($storecfg, $volid) = @_;
+ my ($storecfg, $volid, $deviceid) = @_;
+
+ return if $deviceid =~ m/tpmstate0/;
my $storage_name = PVE::Storage::parse_volume_id($volid);
my $scfg = $storecfg->{ids}->{$storage_name};
+ die "could not find storage '$storage_name'\n" if !defined($scfg);
if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
return 1;
return 1;
}
- return undef;
+ return;
}
sub qga_check_running {
eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
if ($@) {
- warn "Qemu Guest Agent is not running - $@" if !$nowarn;
+ warn "QEMU Guest Agent is not running - $@" if !$nowarn;
return 0;
}
return 1;
}
sub qemu_img_convert {
- my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
+ my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
my $storecfg = PVE::Storage::config();
my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
$src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
$src_is_iscsi = ($src_path =~ m|^iscsi://|);
$cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
- } elsif (-f $src_volid) {
+ } elsif (-f $src_volid || -b $src_volid) {
$src_path = $src_volid;
if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
$src_format = $1;
if $snapname && $src_format && $src_format eq "qcow2";
push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
push @$cmd, '-T', $cachemode if defined($cachemode);
+ push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
if ($src_is_iscsi) {
push @$cmd, '--image-opts';
if($line =~ m/\((\S+)\/100\%\)/){
my $percent = $1;
my $transferred = int($size * $percent / 100);
- my $remaining = $size - $transferred;
+ my $total_h = render_bytes($size, 1);
+ my $transferred_h = render_bytes($transferred, 1);
- print "transferred: $transferred bytes remaining: $remaining bytes total: $size bytes progression: $percent %\n";
+ print "transferred $transferred_h of $total_h ($percent%)\n";
}
};
sub qemu_img_format {
my ($scfg, $volname) = @_;
- if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
+ # FIXME: this entire function is kind of weird given that `parse_volname`
+ # also already gives us a format?
+ my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi';
+
+ if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
return $1;
} else {
return "raw";
# 'complete': wait until all jobs are ready, block-job-complete them (default)
# 'cancel': wait until all jobs are ready, block-job-cancel them
# 'skip': wait until all jobs are ready, return with block jobs in ready state
+# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
sub qemu_drive_mirror_monitor {
- my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
+ my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
$completion //= 'complete';
+ $op //= "mirror";
eval {
my $err_complete = 0;
+ my $starttime = time ();
while (1) {
- die "storage migration timed out\n" if $err_complete > 300;
+ die "block job ('$op') timed out\n" if $err_complete > 300;
my $stats = mon_cmd($vmid, "query-block-jobs");
+ my $ctime = time();
- my $running_mirror_jobs = {};
- foreach my $stat (@$stats) {
- next if $stat->{type} ne 'mirror';
- $running_mirror_jobs->{$stat->{device}} = $stat;
+ my $running_jobs = {};
+ for my $stat (@$stats) {
+ next if $stat->{type} ne $op;
+ $running_jobs->{$stat->{device}} = $stat;
}
my $readycounter = 0;
- foreach my $job (keys %$jobs) {
+ for my $job_id (sort keys %$jobs) {
+ my $job = $running_jobs->{$job_id};
- if(defined($jobs->{$job}->{complete}) && !defined($running_mirror_jobs->{$job})) {
- print "$job : finished\n";
- delete $jobs->{$job};
+ my $vanished = !defined($job);
+ my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
+ if($complete || ($vanished && $completion eq 'auto')) {
+ print "$job_id: $op-job finished\n";
+ delete $jobs->{$job_id};
next;
}
- die "$job: mirroring has been cancelled\n" if !defined($running_mirror_jobs->{$job});
+ die "$job_id: '$op' has been cancelled\n" if !defined($job);
- my $busy = $running_mirror_jobs->{$job}->{busy};
- my $ready = $running_mirror_jobs->{$job}->{ready};
- if (my $total = $running_mirror_jobs->{$job}->{len}) {
- my $transferred = $running_mirror_jobs->{$job}->{offset} || 0;
+ my $busy = $job->{busy};
+ my $ready = $job->{ready};
+ if (my $total = $job->{len}) {
+ my $transferred = $job->{offset} || 0;
my $remaining = $total - $transferred;
my $percent = sprintf "%.2f", ($transferred * 100 / $total);
- print "$job: transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
+ my $duration = $ctime - $starttime;
+ my $total_h = render_bytes($total, 1);
+ my $transferred_h = render_bytes($transferred, 1);
+
+ my $status = sprintf(
+ "transferred $transferred_h of $total_h ($percent%%) in %s",
+ render_duration($duration),
+ );
+
+ if ($ready) {
+ if ($busy) {
+ $status .= ", still busy"; # shouldn't even happen? but mirror is weird
+ } else {
+ $status .= ", ready";
+ }
+ }
+ print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
+ $jobs->{$job_id}->{ready} = $ready;
}
- $readycounter++ if $running_mirror_jobs->{$job}->{ready};
+ $readycounter++ if $job->{ready};
}
last if scalar(keys %$jobs) == 0;
if ($readycounter == scalar(keys %$jobs)) {
- print "all mirroring jobs are ready \n";
- last if $completion eq 'skip'; #do the complete later
+ print "all '$op' jobs are ready\n";
+
+ # do the complete later (or has already been done)
+ last if $completion eq 'skip' || $completion eq 'auto';
if ($vmiddst && $vmiddst != $vmid) {
my $agent_running = $qga && qga_check_running($vmid);
if ($agent_running) {
print "freeze filesystem\n";
eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
+ warn $@ if $@;
} else {
print "suspend vm\n";
eval { PVE::QemuServer::vm_suspend($vmid, 1); };
+ warn $@ if $@;
}
# if we clone a disk for a new target vm, we don't switch the disk
if ($agent_running) {
print "unfreeze filesystem\n";
eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
+ warn $@ if $@;
} else {
print "resume vm\n";
- eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
+ eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
+ warn $@ if $@;
}
last;
} else {
- foreach my $job (keys %$jobs) {
+ for my $job_id (sort keys %$jobs) {
# try to switch the disk if source and destination are on the same guest
- print "$job: Completing block job...\n";
+ print "$job_id: Completing block job_id...\n";
my $op;
if ($completion eq 'complete') {
} else {
die "invalid completion value: $completion\n";
}
- eval { mon_cmd($vmid, $op, device => $job) };
+ eval { mon_cmd($vmid, $op, device => $job_id) };
if ($@ =~ m/cannot be completed/) {
- print "$job: Block job cannot be completed, try again.\n";
+ print "$job_id: block job cannot be completed, trying again.\n";
$err_complete++;
}else {
- print "$job: Completed successfully.\n";
- $jobs->{$job}->{complete} = 1;
+ print "$job_id: Completed successfully.\n";
+ $jobs->{$job_id}->{complete} = 1;
}
}
}
if ($err) {
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
- die "mirroring error: $err";
+ die "block job ($op) error: $err";
}
-
}
sub qemu_blockjobs_cancel {
}
}
+# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
+# source, but some storages have problems with io_uring, sometimes even leading to crashes.
+my sub clone_disk_check_io_uring {
+ my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
+
+ return if !$use_drive_mirror;
+
+ # Don't complain when not changing storage.
+ # Assume if it works for the source, it'll work for the target too.
+ return if $src_storeid eq $dst_storeid;
+
+ my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
+ my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
+
+ my $cache_direct = drive_uses_cache_direct($src_drive);
+
+ my $src_uses_io_uring;
+ if ($src_drive->{aio}) {
+ $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
+ } else {
+ $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
+ }
+
+ die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
+ if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
+}
+
sub clone_disk {
- my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
- $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
+ my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
+
+ my ($vmid, $running) = $source->@{qw(vmid running)};
+ my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
+
+ my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
+ my ($storage, $format) = $dest->@{qw(storage format)};
+
+ my $unused = $src_drivename =~ /^unused/;
+ my $use_drive_mirror = $full && $running && $src_drivename && !$snapname && !$unused;
+
+ if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
+ die "cloning from/to EFI disk requires EFI disk\n"
+ if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
+ die "cloning from/to TPM state requires TPM state\n"
+ if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
+
+ # This would lead to two device nodes in QEMU pointing to the same backing image!
+ die "cannot change drive name when cloning disk from/to the same VM\n"
+ if $use_drive_mirror && $vmid == $newvmid;
+ }
+
+ die "cannot move TPM state while VM is running\n"
+ if $use_drive_mirror && $src_drivename eq 'tpmstate0';
my $newvolid;
+ print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
+ print "$src_drivename " if $src_drivename;
+ print "($drive->{file})\n";
+
if (!$full) {
- print "create linked clone of drive $drivename ($drive->{file})\n";
$newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
push @$newvollist, $newvolid;
} else {
-
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
- $storeid = $storage if $storage;
+ my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
+ my $storeid = $storage || $src_storeid;
my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
- my ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 3);
- print "create full clone of drive $drivename ($drive->{file})\n";
my $name = undef;
+ my $size = undef;
if (drive_is_cloudinit($drive)) {
$name = "vm-$newvmid-cloudinit";
- $name .= ".$dst_format" if $dst_format ne 'raw';
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ if ($scfg->{path}) {
+ $name .= ".$dst_format";
+ }
$snapname = undef;
$size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
- } elsif ($drivename eq 'efidisk0') {
- $size = get_efivars_size($conf);
+ } elsif ($dst_drivename eq 'efidisk0') {
+ $size = $efisize or die "internal error - need to specify EFI disk size\n";
+ } elsif ($dst_drivename eq 'tpmstate0') {
+ $dst_format = 'raw';
+ $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
+ } else {
+ clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
+
+ $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
}
- $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024));
+ $newvolid = PVE::Storage::vdisk_alloc(
+ $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
+ );
push @$newvollist, $newvolid;
PVE::Storage::activate_volumes($storecfg, [$newvolid]);
if (drive_is_cloudinit($drive)) {
+ # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
+ # if this is the case, we have to complete any block-jobs still there from
+ # previous drive-mirrors
+ if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
+ qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
+ }
goto no_data_clone;
}
my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
- if (!$running || $snapname) {
- # TODO: handle bwlimits
- if ($drivename eq 'efidisk0') {
+ if ($use_drive_mirror) {
+ qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
+ $completion, $qga, $bwlimit);
+ } else {
+ if ($dst_drivename eq 'efidisk0') {
# the relevant data on the efidisk may be smaller than the source
# e.g. on RBD/ZFS, so we use dd to copy only the amount
# that is given by the OVMF_VARS.fd
- my $src_path = PVE::Storage::path($storecfg, $drive->{file});
+ my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
my $dst_path = PVE::Storage::path($storecfg, $newvolid);
- run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=1", "count=$size", "if=$src_path", "of=$dst_path"]);
- } else {
- qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
- }
- } else {
- my $kvmver = get_running_qemu_version ($vmid);
- if (!min_version($kvmver, 2, 7)) {
- die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
- if $drive->{iothread};
- }
+ my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
- qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs, $completion, $qga, $bwlimit);
+ # better for Ceph if block size is not too small, see bug #3324
+ my $bs = 1024*1024;
+
+ my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
+
+ if ($src_format eq 'qcow2' && $snapname) {
+ die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
+ if !min_version(kvm_user_version(), 6, 2);
+ push $cmd->@*, '-l', $snapname;
+ }
+ push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
+ run_command($cmd);
+ } else {
+ qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
+ }
}
}
no_data_clone:
- my ($size) = PVE::Storage::volume_size_info($storecfg, $newvolid, 3);
+ my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
- my $disk = $drive;
- $disk->{format} = undef;
+ my $disk = dclone($drive);
+ delete $disk->{format};
$disk->{file} = $newvolid;
- $disk->{size} = $size;
+ $disk->{size} = $size if defined($size) && !$unused;
return $disk;
}
$machine_type = $1;
$use_old_bios_files = 1;
} else {
- my $version = PVE::QemuServer::Machine::extract_version($machine_type, kvm_user_version());
+ my $version = extract_version($machine_type, kvm_user_version());
# Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
# load new efi bios files on migration. So this hack is required to allow
# live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
}
sub get_efivars_size {
- my ($conf) = @_;
+ my ($conf, $efidisk) = @_;
+
my $arch = get_vm_arch($conf);
- my (undef, $ovmf_vars) = get_ovmf_files($arch);
- die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
+ $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
+ my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
+ my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
return -s $ovmf_vars;
}
return;
}
-sub create_efidisk($$$$$) {
- my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_;
+sub update_tpmstate_size {
+ my ($conf) = @_;
+
+ my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
+ $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
+ $conf->{tpmstate0} = print_drive($disk);
+}
+
+sub create_efidisk($$$$$$$) {
+ my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
- my (undef, $ovmf_vars) = get_ovmf_files($arch);
- die "EFI vars default image not found\n" if ! -f $ovmf_vars;
+ my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
my $vars_size_b = -s $ovmf_vars;
my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
PVE::Storage::activate_volumes($storecfg, [$volid]);
qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
- my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
+ my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
return ($volid, $size/1024);
}
}
my $controller = int($drive->{index} / $maxdev);
- my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single') ? "virtioscsi" : "scsihw";
+ my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
+ ? "virtioscsi"
+ : "scsihw";
return ($maxdev, $controller, $controller_prefix);
}
-sub windows_version {
- my ($ostype) = @_;
-
- return 0 if !$ostype;
-
- my $winversion = 0;
-
- if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
- $winversion = 5;
- } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
- $winversion = 6;
- } elsif ($ostype =~ m/^win(\d+)$/) {
- $winversion = $1;
- }
-
- return $winversion;
-}
-
sub resolve_dst_disk_format {
my ($storecfg, $storeid, $src_volname, $format) = @_;
my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
sub nbd_stop {
my ($vmid) = @_;
- mon_cmd($vmid, 'nbd-server-stop');
+ mon_cmd($vmid, 'nbd-server-stop', timeout => 25);
}
sub create_reboot_request {
return $res;
}
+sub bootorder_from_legacy {
+ my ($conf, $bootcfg) = @_;
+
+ my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
+ my $bootindex_hash = {};
+ my $i = 1;
+ foreach my $o (split(//, $boot)) {
+ $bootindex_hash->{$o} = $i*100;
+ $i++;
+ }
+
+ my $bootorder = {};
+
+ PVE::QemuConfig->foreach_volume($conf, sub {
+ my ($ds, $drive) = @_;
+
+ if (drive_is_cdrom ($drive, 1)) {
+ if ($bootindex_hash->{d}) {
+ $bootorder->{$ds} = $bootindex_hash->{d};
+ $bootindex_hash->{d} += 1;
+ }
+ } elsif ($bootindex_hash->{c}) {
+ $bootorder->{$ds} = $bootindex_hash->{c}
+ if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
+ $bootindex_hash->{c} += 1;
+ }
+ });
+
+ if ($bootindex_hash->{n}) {
+ for (my $i = 0; $i < $MAX_NETS; $i++) {
+ my $netname = "net$i";
+ next if !$conf->{$netname};
+ $bootorder->{$netname} = $bootindex_hash->{n};
+ $bootindex_hash->{n} += 1;
+ }
+ }
+
+ return $bootorder;
+}
+
+# Generate default device list for 'boot: order=' property. Matches legacy
+# default boot order, but with explicit device names. This is important, since
+# the fallback for when neither 'order' nor the old format is specified relies
+# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
+sub get_default_bootdevices {
+ my ($conf) = @_;
+
+ my @ret = ();
+
+ # harddisk
+ my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
+ push @ret, $first if $first;
+
+ # cdrom
+ $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
+ push @ret, $first if $first;
+
+ # network
+ for (my $i = 0; $i < $MAX_NETS; $i++) {
+ my $netname = "net$i";
+ next if !$conf->{$netname};
+ push @ret, $netname;
+ last;
+ }
+
+ return \@ret;
+}
+
+sub device_bootorder {
+ my ($conf) = @_;
+
+ return bootorder_from_legacy($conf) if !defined($conf->{boot});
+
+ my $boot = parse_property_string($boot_fmt, $conf->{boot});
+
+ my $bootorder = {};
+ if (!defined($boot) || $boot->{legacy}) {
+ $bootorder = bootorder_from_legacy($conf, $boot);
+ } elsif ($boot->{order}) {
+ my $i = 100; # start at 100 to allow user to insert devices before us with -args
+ for my $dev (PVE::Tools::split_list($boot->{order})) {
+ $bootorder->{$dev} = $i++;
+ }
+ }
+
+ return $bootorder;
+}
+
+sub register_qmeventd_handle {
+ my ($vmid) = @_;
+
+ my $fh;
+ my $peer = "/var/run/qmeventd.sock";
+ my $count = 0;
+
+ for (;;) {
+ $count++;
+ $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
+ last if $fh;
+ if ($! != EINTR && $! != EAGAIN) {
+ die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
+ }
+ if ($count > 4) {
+ die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
+ . "after $count retries\n";
+ }
+ usleep(25000);
+ }
+
+ # send handshake to mark VM as backing up
+ print $fh to_json({vzdump => {vmid => "$vmid"}});
+
+ # return handle to be closed later when inhibit is no longer required
+ return $fh;
+}
+
# bash completion helper
sub complete_backup_archives {
return $res;
}
+sub vm_is_paused {
+ my ($vmid, $include_suspended) = @_;
+ my $qmpstatus = eval {
+ PVE::QemuConfig::assert_config_exists_on_node($vmid);
+ mon_cmd($vmid, "query-status");
+ };
+ warn "$@\n" if $@;
+ return $qmpstatus && (
+ $qmpstatus->{status} eq "paused" ||
+ $qmpstatus->{status} eq "prelaunch" ||
+ ($include_suspended && $qmpstatus->{status} eq "suspended")
+ );
+}
+
+sub check_volume_storage_type {
+ my ($storecfg, $vol) = @_;
+
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
+
+ die "storage '$storeid' does not support content-type '$vtype'\n"
+ if !$scfg->{content}->{$vtype};
+
+ return 1;
+}
+
+sub add_nets_bridge_fdb {
+ my ($conf, $vmid) = @_;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $iface = "tap${vmid}i$1";
+ # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
+ my $net = parse_net($conf->{$opt}, 1) or next;
+
+ my $mac = $net->{macaddr};
+ if (!$mac) {
+ log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
+ if !file_read_firstline("/sys/class/net/$iface/brport/learning");
+ next;
+ }
+
+ my $bridge = $net->{bridge};
+ if (!$bridge) {
+ log_warn("Interface '$iface' not attached to any bridge.");
+ next;
+ }
+ if ($have_sdn) {
+ PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge);
+ } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
+ PVE::Network::add_bridge_fdb($iface, $mac);
+ }
+ }
+}
+
+sub del_nets_bridge_fdb {
+ my ($conf, $vmid) = @_;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $iface = "tap${vmid}i$1";
+
+ my $net = parse_net($conf->{$opt}) or next;
+ my $mac = $net->{macaddr} or next;
+
+ my $bridge = $net->{bridge};
+ if ($have_sdn) {
+ PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge);
+ } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
+ PVE::Network::del_bridge_fdb($iface, $mac);
+ }
+ }
+}
+
+sub create_ifaces_ipams_ips {
+ my ($conf, $vmid) = @_;
+
+ return if !$have_sdn;
+
+ foreach my $opt (keys %$conf) {
+ if ($opt =~ m/^net(\d+)$/) {
+ my $value = $conf->{$opt};
+ my $net = PVE::QemuServer::parse_net($value);
+ eval { PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{name}, $net->{macaddr}, $vmid, undef, 1) };
+ warn $@ if $@;
+ }
+ }
+}
+
+sub delete_ifaces_ipams_ips {
+ my ($conf, $vmid) = @_;
+
+ return if !$have_sdn;
+
+ foreach my $opt (keys %$conf) {
+ if ($opt =~ m/^net(\d+)$/) {
+ my $net = PVE::QemuServer::parse_net($conf->{$opt});
+ eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
+ warn $@ if $@;
+ }
+ }
+}
+
1;