use MIME::Base64;
use POSIX;
use Storable qw(dclone);
-use Time::HiRes qw(gettimeofday);
+use Time::HiRes qw(gettimeofday usleep);
use URI::Escape;
use UUID;
my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
-# Note about locking: we use flock on the config file protect
-# against concurent actions.
-# Aditionaly, we have a 'lock' setting in the config file. This
-# can be set to 'migrate', 'backup', 'snapshot' or 'rollback'. Most actions are not
-# allowed when such lock is set. But you can ignore this kind of
-# lock with the --skiplock flag.
+# Note about locking: we use flock on the config file protect against concurent actions.
+# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
+# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
+# But you can ignore this kind of lock with the --skiplock flag.
cfs_register_file('/qemu-server/',
\&parse_vm_config,
my $agent_fmt = {
enabled => {
- description => "Enable/disable Qemu GuestAgent.",
+ description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
type => 'boolean',
default => 0,
default_key => 1,
type => 'string',
enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
default_key => 1,
- description => "The file on the host to gather entropy from. In most"
- . " cases /dev/urandom should be preferred over /dev/random"
- . " to avoid entropy-starvation issues on the host. Using"
- . " urandom does *not* decrease security in any meaningful"
- . " way, as it's still seeded from real entropy, and the"
- . " bytes provided will most likely be mixed with real"
- . " entropy on the guest as well. /dev/hwrng can be used"
- . " to pass through a hardware RNG from the host.",
+ description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
+ ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
+ ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
+ ." still seeded from real entropy, and the bytes provided will most likely be mixed"
+ ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
+ ." a hardware RNG from the host.",
},
max_bytes => {
type => 'integer',
- description => "Maximum bytes of entropy injected into the guest every"
- . " 'period' milliseconds. Prefer a lower value when using"
- . " /dev/random as source. Use 0 to disable limiting"
- . " (potentially dangerous!).",
+ description => "Maximum bytes of entropy allowed to get injected into the guest every"
+ ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
+ ." `0` to disable limiting (potentially dangerous!).",
optional => 1,
- # default is 1 KiB/s, provides enough entropy to the guest to avoid
- # boot-starvation issues (e.g. systemd etc...) while allowing no chance
- # of overwhelming the host, provided we're reading from /dev/urandom
+ # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
+ # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
+ # reading from /dev/urandom
default => 1024,
},
period => {
type => 'integer',
- description => "Every 'period' milliseconds the entropy-injection quota"
- . " is reset, allowing the guest to retrieve another"
- . " 'max_bytes' of entropy.",
+ description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
+ ." the guest to retrieve another 'max_bytes' of entropy.",
optional => 1,
default => 1000,
},
hotplug => {
optional => 1,
type => 'string', format => 'pve-hotplug-features',
- description => "Selectively enable hotplug features. This is a comma separated list of hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable hotplug completely. Value '1' is an alias for the default 'network,disk,usb'.",
+ description => "Selectively enable hotplug features. This is a comma separated list of"
+ ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
+ ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
default => 'network,disk,usb',
},
reboot => {
optional => 1,
type => 'number',
description => "Limit of CPU usage.",
- verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
+ verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
+ ." total of '2' CPU time. Value '0' indicates no CPU limit.",
minimum => 0,
maximum => 128,
default => 0,
memory => {
optional => 1,
type => 'integer',
- description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
+ description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
+ ." you use the balloon device.",
minimum => 16,
default => 512,
},
shares => {
optional => 1,
type => 'integer',
- description => "Amount of memory shares for auto-ballooning. The larger the number is, the more memory this VM gets. Number is relative to weights of all other running VMs. Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
+ description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
+ ." more memory this VM gets. Number is relative to weights of all other running VMs."
+ ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
minimum => 0,
maximum => 50000,
default => 1000,
keyboard => {
optional => 1,
type => 'string',
- description => "Keybord layout for vnc server. Default is read from the '/etc/pve/datacenter.cfg' configuration file.".
- "It should not be necessary to set it.",
+ description => "Keyboard layout for VNC server. The default is read from the"
+ ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
enum => PVE::Tools::kvmkeymaplist(),
default => undef,
},
ostype => {
optional => 1,
type => 'string',
- enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 l24 l26 solaris)],
+ enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
description => "Specify guest operating system.",
verbose_description => <<EODESC,
Specify guest operating system. This is used to enable special
win7;; Microsoft Windows 7
win8;; Microsoft Windows 8/2012/2012r2
win10;; Microsoft Windows 10/2016/2019
+win11;; Microsoft Windows 11/2022
l24;; Linux 2.4 Kernel
l26;; Linux 2.6 - 5.X Kernel
solaris;; Solaris/OpenSolaris/OpenIndiania kernel
},
agent => {
optional => 1,
- description => "Enable/disable Qemu GuestAgent and its properties.",
+ description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
type => 'string',
format => $agent_fmt,
},
localtime => {
optional => 1,
type => 'boolean',
- description => "Set the real time clock to local time. This is enabled by default if ostype"
- ." indicates a Microsoft OS.",
+ description => "Set the real time clock (RTC) to local time. This is enabled by default if"
+ ." the `ostype` indicates a Microsoft Windows OS.",
},
freeze => {
optional => 1,
push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
run_command($emulator_cmd, outfunc => sub { print $1; });
+ my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
+ while (! -e $paths->{pid}) {
+ die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
+ usleep(50_000);
+ }
+
# return untainted PID of swtpm daemon so it can be killed on error
file_read_firstline($paths->{pid}) =~ m/(\d+)/;
return $1;
my $type = 'default';
if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
- $type = $efidisk->{'ms-keys'} ? "4m-ms" : "4m";
+ $type = $efidisk->{'pre-enrolled-keys'} ? "4m-ms" : "4m";
}
return $types->{$type}->@*;
};
my $err = $@;
- # force stop with 10 sec timeout and 'nocheck'
- # always stop, even if QMP failed
+ # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
die $err if $err;
my $running = check_running($vmid);
- if ($running && do_snapshots_with_qemu($storecfg, $volid)){
+ if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
} else {
PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
});
}
- if ($running && do_snapshots_with_qemu($storecfg, $volid)){
+ if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
} else {
PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
push @$cmd, '-S';
}
- # host pci devices
+ my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
+
+ my $pci_devices = {}; # host pci devices
for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
- my $d = parse_hostpci($conf->{"hostpci$i"});
- next if !$d;
- my $pcidevices = $d->{pciid};
- foreach my $pcidevice (@$pcidevices) {
- my $pciid = $pcidevice->{id};
-
- my $info = PVE::SysFSTools::pci_device_info("$pciid");
- die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support();
- die "no pci device info for device '$pciid'\n" if !$info;
-
- if ($d->{mdev}) {
- my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i);
- PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $d->{mdev});
- } else {
- die "can't unbind/bind PCI group to VFIO '$pciid'\n"
- if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
- die "can't reset PCI device '$pciid'\n"
- if $info->{has_fl_reset} && !PVE::SysFSTools::pci_dev_reset($info);
+ my $dev = $conf->{"hostpci$i"} or next;
+ $pci_devices->{$i} = parse_hostpci($dev);
+ }
+
+ my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
+ # reserve all PCI IDs before actually doing anything with them
+ PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
+
+ eval {
+ for my $id (sort keys %$pci_devices) {
+ my $d = $pci_devices->{$id};
+ for my $dev ($d->{pciid}->@*) {
+ PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
}
- }
+ }
+ };
+ if (my $err = $@) {
+ eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
+ warn $@ if $@;
+ die $err;
}
PVE::Storage::activate_volumes($storecfg, $vollist);
eval {
- run_command(['/bin/systemctl', 'stop', "$vmid.scope"],
- outfunc => sub {}, errfunc => sub {});
+ run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
};
# Issues with the above 'stop' not being fully completed are extremely rare, a very low
# timeout should be more than enough here...
my $cpuunits = get_cpuunits($conf);
- my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
my %run_params = (
timeout => $statefile ? undef : $start_timeout,
umask => 0077,
$run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
}
- my %properties = (
+ my %systemd_properties = (
Slice => 'qemu.slice',
KillMode => 'process',
SendSIGKILL => 0,
if (PVE::CGroup::cgroup_mode() == 2) {
$cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
- $properties{CPUWeight} = $cpuunits;
+ $systemd_properties{CPUWeight} = $cpuunits;
} else {
- $properties{CPUShares} = $cpuunits;
+ $systemd_properties{CPUShares} = $cpuunits;
}
if (my $cpulimit = $conf->{cpulimit}) {
- $properties{CPUQuota} = int($cpulimit * 100);
+ $systemd_properties{CPUQuota} = int($cpulimit * 100);
}
- $properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
+ $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
my $run_qemu = sub {
PVE::Tools::run_fork sub {
- PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties);
+ PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
my $tpmpid;
if (my $tpm = $conf->{tpmstate0}) {
if (my $err = $@) {
# deactivate volumes if start fails
eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
+ eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
+
die "start failed: $err";
}
+ # re-reserve all PCI IDs now that we can know the actual VM PID
+ my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
+ eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
+ warn $@ if $@;
+
print "migration listens on $migrate_uri\n" if $migrate_uri;
$res->{migrate_uri} = $migrate_uri;
unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
}
+ my $ids = [];
foreach my $key (keys %$conf) {
next if $key !~ m/^hostpci(\d+)$/;
my $hostpciindex = $1;
foreach my $pci (@{$d->{pciid}}) {
my $pciid = $pci->{id};
+ push @$ids, $pci->{id};
PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
}
}
+ PVE::QemuServer::PCI::remove_pci_reservation($ids);
vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
};
rbd => 1,
};
sub do_snapshots_with_qemu {
- my ($storecfg, $volid) = @_;
+ my ($storecfg, $volid, $deviceid) = @_;
+
+ return if $deviceid =~ m/tpmstate0/;
my $storage_name = PVE::Storage::parse_volume_id($volid);
my $scfg = $storecfg->{ids}->{$storage_name};