use MIME::Base64;
use POSIX;
use Storable qw(dclone);
-use Time::HiRes qw(gettimeofday);
+use Time::HiRes qw(gettimeofday usleep);
use URI::Escape;
use UUID;
use PVE::CGroup;
use PVE::DataCenterConfig;
use PVE::Exception qw(raise raise_param_exc);
+use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
use PVE::INotify;
use PVE::JSONSchema qw(get_standard_option parse_property_string);
use PVE::QemuServer::Cloudinit;
use PVE::QemuServer::CGroup;
use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
-use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom parse_drive print_drive);
+use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
use PVE::QemuServer::Machine;
use PVE::QemuServer::Memory;
use PVE::QemuServer::Monitor qw(mon_cmd);
my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
my $OVMF = {
- x86_64 => [
- "$EDK2_FW_BASE/OVMF_CODE.fd",
- "$EDK2_FW_BASE/OVMF_VARS.fd"
- ],
- aarch64 => [
- "$EDK2_FW_BASE/AAVMF_CODE.fd",
- "$EDK2_FW_BASE/AAVMF_VARS.fd"
- ],
+ x86_64 => {
+ '4m' => [
+ "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
+ "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
+ ],
+ '4m-ms' => [
+ "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
+ "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
+ ],
+ default => [
+ "$EDK2_FW_BASE/OVMF_CODE.fd",
+ "$EDK2_FW_BASE/OVMF_VARS.fd",
+ ],
+ },
+ aarch64 => {
+ default => [
+ "$EDK2_FW_BASE/AAVMF_CODE.fd",
+ "$EDK2_FW_BASE/AAVMF_VARS.fd",
+ ],
+ },
};
my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
-# Note about locking: we use flock on the config file protect
-# against concurent actions.
-# Aditionaly, we have a 'lock' setting in the config file. This
-# can be set to 'migrate', 'backup', 'snapshot' or 'rollback'. Most actions are not
-# allowed when such lock is set. But you can ignore this kind of
-# lock with the --skiplock flag.
+# Note about locking: we use flock on the config file protect against concurent actions.
+# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
+# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
+# But you can ignore this kind of lock with the --skiplock flag.
cfs_register_file('/qemu-server/',
\&parse_vm_config,
my $agent_fmt = {
enabled => {
- description => "Enable/disable Qemu GuestAgent.",
+ description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
type => 'boolean',
default => 0,
default_key => 1,
},
fstrim_cloned_disks => {
- description => "Run fstrim after cloning/moving a disk.",
+ description => "Run fstrim after moving a disk or migrating the VM.",
type => 'boolean',
optional => 1,
default => 0
},
driver => {
type => 'string',
- enum => ['spice'],
+ enum => ['spice', 'none'],
default => 'spice',
optional => 1,
description => "Driver backend for the audio device."
type => 'string',
enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
default_key => 1,
- description => "The file on the host to gather entropy from. In most"
- . " cases /dev/urandom should be preferred over /dev/random"
- . " to avoid entropy-starvation issues on the host. Using"
- . " urandom does *not* decrease security in any meaningful"
- . " way, as it's still seeded from real entropy, and the"
- . " bytes provided will most likely be mixed with real"
- . " entropy on the guest as well. /dev/hwrng can be used"
- . " to pass through a hardware RNG from the host.",
+ description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
+ ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
+ ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
+ ." still seeded from real entropy, and the bytes provided will most likely be mixed"
+ ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
+ ." a hardware RNG from the host.",
},
max_bytes => {
type => 'integer',
- description => "Maximum bytes of entropy injected into the guest every"
- . " 'period' milliseconds. Prefer a lower value when using"
- . " /dev/random as source. Use 0 to disable limiting"
- . " (potentially dangerous!).",
+ description => "Maximum bytes of entropy allowed to get injected into the guest every"
+ ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
+ ." `0` to disable limiting (potentially dangerous!).",
optional => 1,
- # default is 1 KiB/s, provides enough entropy to the guest to avoid
- # boot-starvation issues (e.g. systemd etc...) while allowing no chance
- # of overwhelming the host, provided we're reading from /dev/urandom
+ # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
+ # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
+ # reading from /dev/urandom
default => 1024,
},
period => {
type => 'integer',
- description => "Every 'period' milliseconds the entropy-injection quota"
- . " is reset, allowing the guest to retrieve another"
- . " 'max_bytes' of entropy.",
+ description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
+ ." the guest to retrieve another 'max_bytes' of entropy.",
optional => 1,
default => 1000,
},
hotplug => {
optional => 1,
type => 'string', format => 'pve-hotplug-features',
- description => "Selectively enable hotplug features. This is a comma separated list of hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable hotplug completely. Value '1' is an alias for the default 'network,disk,usb'.",
+ description => "Selectively enable hotplug features. This is a comma separated list of"
+ ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
+ ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
default => 'network,disk,usb',
},
reboot => {
optional => 1,
type => 'number',
description => "Limit of CPU usage.",
- verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
+ verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
+ ." total of '2' CPU time. Value '0' indicates no CPU limit.",
minimum => 0,
maximum => 128,
default => 0,
cpuunits => {
optional => 1,
type => 'integer',
- description => "CPU weight for a VM.",
- verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.",
+ description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
+ verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
+ ." The larger the number is, the more CPU time this VM gets. Number is relative to"
+ ." weights of all the other running VMs.",
minimum => 2,
maximum => 262144,
- default => 1024,
+ default => 'cgroup v1: 1024, cgroup v2: 100',
},
memory => {
optional => 1,
type => 'integer',
- description => "Amount of RAM for the VM in MB. This is the maximum available memory when you use the balloon device.",
+ description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
+ ." you use the balloon device.",
minimum => 16,
default => 512,
},
shares => {
optional => 1,
type => 'integer',
- description => "Amount of memory shares for auto-ballooning. The larger the number is, the more memory this VM gets. Number is relative to weights of all other running VMs. Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
+ description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
+ ." more memory this VM gets. Number is relative to weights of all other running VMs."
+ ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
minimum => 0,
maximum => 50000,
default => 1000,
keyboard => {
optional => 1,
type => 'string',
- description => "Keybord layout for vnc server. Default is read from the '/etc/pve/datacenter.cfg' configuration file.".
- "It should not be necessary to set it.",
+ description => "Keyboard layout for VNC server. The default is read from the"
+ ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
enum => PVE::Tools::kvmkeymaplist(),
default => undef,
},
description => {
optional => 1,
type => 'string',
- description => "Description for the VM. Only used on the configuration web interface. This is saved as comment inside the configuration file.",
+ description => "Description for the VM. Shown in the web-interface VM's summary."
+ ." This is saved as comment inside the configuration file.",
+ maxLength => 1024 * 8,
},
ostype => {
optional => 1,
type => 'string',
- enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 l24 l26 solaris)],
+ enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
description => "Specify guest operating system.",
verbose_description => <<EODESC,
Specify guest operating system. This is used to enable special
wvista;; Microsoft Windows Vista
win7;; Microsoft Windows 7
win8;; Microsoft Windows 8/2012/2012r2
-win10;; Microsoft Windows 10/2016
+win10;; Microsoft Windows 10/2016/2019
+win11;; Microsoft Windows 11/2022
l24;; Linux 2.4 Kernel
l26;; Linux 2.6 - 5.X Kernel
solaris;; Solaris/OpenSolaris/OpenIndiania kernel
},
agent => {
optional => 1,
- description => "Enable/disable Qemu GuestAgent and its properties.",
+ description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
type => 'string',
format => $agent_fmt,
},
localtime => {
optional => 1,
type => 'boolean',
- description => "Set the real time clock to local time. This is enabled by default if ostype"
- ." indicates a Microsoft OS.",
+ description => "Set the real time clock (RTC) to local time. This is enabled by default if"
+ ." the `ostype` indicates a Microsoft Windows OS.",
},
freeze => {
optional => 1,
description => 'Specifies the cloud-init configuration format. The default depends on the'
.' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
.' and `configdrive2` for windows.',
- enum => ['configdrive2', 'nocloud'],
+ enum => ['configdrive2', 'nocloud', 'opennebula'],
},
ciuser => {
optional => 1,
$confdesc->{"numa$i"} = $numadesc;
}
-my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
- 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3',
- 'e1000-82540em', 'e1000-82544gc', 'e1000-82545em'];
+my $nic_model_list = [
+ 'e1000',
+ 'e1000-82540em',
+ 'e1000-82544gc',
+ 'e1000-82545em',
+ 'e1000e',
+ 'i82551',
+ 'i82557b',
+ 'i82559er',
+ 'ne2k_isa',
+ 'ne2k_pci',
+ 'pcnet',
+ 'rtl8139',
+ 'virtio',
+ 'vmxnet3',
+];
my $nic_model_list_txt = join(' ', sort @$nic_model_list);
my $net_fmt_bridge_descr = <<__EOD__;
The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
gateway should be provided.
-For IPv6 the special string 'auto' can be used to use stateless autoconfiguration.
+For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
+cloud-init 19.4 or newer.
If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
dhcp on IPv4.
sub verify_bootdev {
my ($dev, $noerr) = @_;
- return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && $dev !~ m/^efidisk/;
+ my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
+ return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
my $check = sub {
my ($base) = @_;
return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
}
+my sub get_drive_id {
+ my ($drive) = @_;
+ return "$drive->{interface}$drive->{index}";
+}
+
sub print_drivedevice_full {
my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
my $device = '';
my $maxdev = 0;
- my $drive_id = "$drive->{interface}$drive->{index}";
+ my $drive_id = get_drive_id($drive);
if ($drive->{interface} eq 'virtio') {
my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
$device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
}
}
- if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){
+ if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
$device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
} else {
$device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
}
sub print_drive_commandline_full {
- my ($storecfg, $vmid, $drive) = @_;
+ my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
my $path;
my $volid = $drive->{file};
- my $format;
+ my $format = $drive->{format};
+ my $drive_id = get_drive_id($drive);
+
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+ my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
if (drive_is_cdrom($drive)) {
$path = get_iso_path($storecfg, $vmid, $volid);
+ die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
} else {
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
if ($storeid) {
$path = PVE::Storage::path($storecfg, $volid);
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
+ $format //= qemu_img_format($scfg, $volname);
} else {
$path = $volid;
- $format = "raw";
+ $format //= "raw";
}
}
+ my $is_rbd = $path =~ m/^rbd:/;
+
my $opts = '';
- my @qemu_drive_options = qw(heads secs cyls trans media format cache rerror werror aio discard);
+ my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
foreach my $o (@qemu_drive_options) {
$opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
}
}
}
- $opts .= ",format=$format" if $format && !$drive->{format};
+ if ($pbs_name) {
+ $format = "rbd" if $is_rbd;
+ die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
+ if !$format;
+ $opts .= ",format=alloc-track,file.driver=$format";
+ } elsif ($format) {
+ $opts .= ",format=$format";
+ }
my $cache_direct = 0;
if (my $cache = $drive->{cache}) {
$cache_direct = $cache =~ /^(?:off|none|directsync)$/;
- } elsif (!drive_is_cdrom($drive)) {
+ } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
$opts .= ",cache=none";
$cache_direct = 1;
}
- # aio native works only with O_DIRECT
+ # io_uring with cache mode writeback or writethrough on krbd will hang...
+ my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
+
+ # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
+ # sometimes, just plain disable...
+ my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
+
if (!$drive->{aio}) {
- if($cache_direct) {
- $opts .= ",aio=native";
+ if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
+ # io_uring supports all cache modes
+ $opts .= ",aio=io_uring";
} else {
- $opts .= ",aio=threads";
+ # aio native works only with O_DIRECT
+ if($cache_direct) {
+ $opts .= ",aio=native";
+ } else {
+ $opts .= ",aio=threads";
+ }
}
}
# This used to be our default with discard not being specified:
$detectzeroes = 'on';
}
- $opts .= ",detect-zeroes=$detectzeroes" if $detectzeroes;
+
+ # note: 'detect-zeroes' works per blockdev and we want it to persist
+ # after the alloc-track is removed, so put it on 'file' directly
+ my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
+ $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
+ }
+
+ if ($pbs_name) {
+ $opts .= ",backing=$pbs_name";
+ $opts .= ",auto-remove=on";
}
- my $pathinfo = $path ? "file=$path," : '';
+ # my $file_param = $pbs_name ? "file.file.filename" : "file";
+ my $file_param = "file";
+ if ($pbs_name) {
+ # non-rbd drivers require the underlying file to be a seperate block
+ # node, so add a second .file indirection
+ $file_param .= ".file" if !$is_rbd;
+ $file_param .= ".filename";
+ }
+ my $pathinfo = $path ? "$file_param=$path," : '';
return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
}
+sub print_pbs_blockdev {
+ my ($pbs_conf, $pbs_name) = @_;
+ my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
+ $blockdev .= ",repository=$pbs_conf->{repository}";
+ $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
+ $blockdev .= ",archive=$pbs_conf->{archive}";
+ $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
+ return $blockdev;
+}
+
sub print_netdevice_full {
my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
$romfile = 'pxe-virtio.rom';
} elsif ($device eq 'e1000') {
$romfile = 'pxe-e1000.rom';
+ } elsif ($device eq 'e1000e') {
+ $romfile = 'pxe-e1000e.rom';
} elsif ($device eq 'ne2k') {
$romfile = 'pxe-ne2k_pci.rom';
} elsif ($device eq 'pcnet') {
}
sub destroy_vm {
- my ($storecfg, $vmid, $skiplock, $replacement_conf) = @_;
+ my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
my $conf = PVE::QemuConfig->load_config($vmid);
if ($conf->{template}) {
# check if any base image is still used by a linked clone
- PVE::QemuConfig->foreach_volume($conf, sub {
+ PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive);
});
}
- # only remove disks owned by this VM
- PVE::QemuConfig->foreach_volume($conf, sub {
+ my $volids = {};
+ my $remove_owned_drive = sub {
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive, 1);
my $volid = $drive->{file};
return if !$volid || $volid =~ m|^/|;
+ return if $volids->{$volid};
my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
return if !$path || !$owner || ($owner != $vmid);
+ $volids->{$volid} = 1;
eval { PVE::Storage::vdisk_free($storecfg, $volid) };
warn "Could not remove disk '$volid', check manually: $@" if $@;
- });
+ };
- # also remove unused disk
- my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
- PVE::Storage::foreach_volid($vmdisks, sub {
- my ($volid, $sid, $volname, $d) = @_;
- eval { PVE::Storage::vdisk_free($storecfg, $volid) };
- warn $@ if $@;
- });
+ # only remove disks owned by this VM (referenced in the config)
+ my $include_opts = {
+ include_unused => 1,
+ extra_keys => ['vmstate'],
+ };
+ PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
+
+ for my $snap (values %{$conf->{snapshots}}) {
+ next if !defined($snap->{vmstate});
+ my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
+ next if !defined($drive);
+ $remove_owned_drive->('vmstate', $drive);
+ }
+
+ PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
+
+ if ($purge_unreferenced) { # also remove unreferenced disk
+ my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
+ PVE::Storage::foreach_volid($vmdisks, sub {
+ my ($volid, $sid, $volname, $d) = @_;
+ eval { PVE::Storage::vdisk_free($storecfg, $volid) };
+ warn $@ if $@;
+ });
+ }
if (defined $replacement_conf) {
PVE::QemuConfig->write_config($vmid, $replacement_conf);
$conf->{$key} = $value;
}
+ } else {
+ warn "vm $vmid - unable to parse config: $line\n";
}
}
return if !$sid;
# check if storage is available on both nodes
- my $scfg = PVE::Storage::storage_check_node($storecfg, $sid);
- PVE::Storage::storage_check_node($storecfg, $sid, $node);
+ my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
+ PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
+
+ my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
+
+ die "$volid: content type '$vtype' is not available on storage '$sid'\n"
+ if !$scfg->{content}->{$vtype};
});
}
type => 'string',
optional => 1,
},
+ 'running-machine' => {
+ description => "The currently running machine type (if running).",
+ type => 'string',
+ optional => 1,
+ },
+ 'running-qemu' => {
+ description => "The currently running QEMU version (if running).",
+ type => 'string',
+ optional => 1,
+ },
};
my $last_proc_pid_stat;
my $conf = PVE::QemuConfig->load_config($vmid);
- my $d = { vmid => $vmid };
- $d->{pid} = $list->{$vmid}->{pid};
+ my $d = { vmid => int($vmid) };
+ $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
# fixme: better status?
$d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
$d->{diskread} = 0;
$d->{diskwrite} = 0;
- $d->{template} = PVE::QemuConfig->is_template($conf);
+ $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
$d->{serial} = 1 if conf_has_serial($conf);
$d->{lock} = $conf->{lock} if $conf->{lock};
$d->{netin} += $netdev->{$dev}->{transmit};
if ($full) {
- $d->{nics}->{$dev}->{netout} = $netdev->{$dev}->{receive};
- $d->{nics}->{$dev}->{netin} = $netdev->{$dev}->{transmit};
+ $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
+ $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
}
}
$res->{$vmid}->{diskwrite} = $totalwrbytes;
};
+ my $machinecb = sub {
+ my ($vmid, $resp) = @_;
+ my $data = $resp->{'return'} || [];
+
+ $res->{$vmid}->{'running-machine'} =
+ PVE::QemuServer::Machine::current_from_query_machines($data);
+ };
+
+ my $versioncb = sub {
+ my ($vmid, $resp) = @_;
+ my $data = $resp->{'return'} // {};
+ my $version = 'unknown';
+
+ if (my $v = $data->{qemu}) {
+ $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
+ }
+
+ $res->{$vmid}->{'running-qemu'} = $version;
+ };
+
my $statuscb = sub {
my ($vmid, $resp) = @_;
$qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
+ $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
+ $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
# this fails if ballon driver is not loaded, so this must be
# the last commnand (following command are aborted if this fails).
$qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
return $devs;
}
+sub get_tpm_paths {
+ my ($vmid) = @_;
+ return {
+ socket => "/var/run/qemu-server/$vmid.swtpm",
+ pid => "/var/run/qemu-server/$vmid.swtpm.pid",
+ };
+}
+
+sub add_tpm_device {
+ my ($vmid, $devices, $conf) = @_;
+
+ return if !$conf->{tpmstate0};
+
+ my $paths = get_tpm_paths($vmid);
+
+ push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
+ push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
+ push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
+}
+
+sub start_swtpm {
+ my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
+
+ return if !$tpmdrive;
+
+ my $state;
+ my $tpm = parse_drive("tpmstate0", $tpmdrive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+ if ($storeid) {
+ $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
+ } else {
+ $state = $tpm->{file};
+ }
+
+ my $paths = get_tpm_paths($vmid);
+
+ # during migration, we will get state from remote
+ #
+ if (!$migration) {
+ # run swtpm_setup to create a new TPM state if it doesn't exist yet
+ my $setup_cmd = [
+ "swtpm_setup",
+ "--tpmstate",
+ "file://$state",
+ "--createek",
+ "--create-ek-cert",
+ "--create-platform-cert",
+ "--lock-nvram",
+ "--config",
+ "/etc/swtpm_setup.conf", # do not use XDG configs
+ "--runas",
+ "0", # force creation as root, error if not possible
+ "--not-overwrite", # ignore existing state, do not modify
+ ];
+
+ push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+ # TPM 2.0 supports ECC crypto, use if possible
+ push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
+
+ run_command($setup_cmd, outfunc => sub {
+ print "swtpm_setup: $1\n";
+ });
+ }
+
+ my $emulator_cmd = [
+ "swtpm",
+ "socket",
+ "--tpmstate",
+ "backend-uri=file://$state,mode=0600",
+ "--ctrl",
+ "type=unixio,path=$paths->{socket},mode=0600",
+ "--pid",
+ "file=$paths->{pid}",
+ "--terminate", # terminate on QEMU disconnect
+ "--daemon",
+ ];
+ push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+ run_command($emulator_cmd, outfunc => sub { print $1; });
+
+ my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
+ while (! -e $paths->{pid}) {
+ die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
+ usleep(50_000);
+ }
+
+ # return untainted PID of swtpm daemon so it can be killed on error
+ file_read_firstline($paths->{pid}) =~ m/(\d+)/;
+ return $1;
+}
+
sub vga_conf_has_spice {
my ($vga) = @_;
aarch64 => 'virt',
};
+sub get_installed_machine_version {
+ my ($kvmversion) = @_;
+ $kvmversion = kvm_user_version() if !defined($kvmversion);
+ $kvmversion =~ m/^(\d+\.\d+)/;
+ return $1;
+}
+
+sub windows_get_pinned_machine_version {
+ my ($machine, $base_version, $kvmversion) = @_;
+
+ my $pin_version = $base_version;
+ if (!defined($base_version) ||
+ !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
+ ) {
+ $pin_version = get_installed_machine_version($kvmversion);
+ }
+ if (!$machine || $machine eq 'pc') {
+ $machine = "pc-i440fx-$pin_version";
+ } elsif ($machine eq 'q35') {
+ $machine = "pc-q35-$pin_version";
+ } elsif ($machine eq 'virt') {
+ $machine = "virt-$pin_version";
+ } else {
+ warn "unknown machine type '$machine', not touching that!\n";
+ }
+
+ return $machine;
+}
+
sub get_vm_machine {
my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
my $machine = $forcemachine || $conf->{machine};
if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
+ $kvmversion //= kvm_user_version();
+ # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
+ # layout which confuses windows quite a bit and may result in various regressions..
+ # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
+ if (windows_version($conf->{ostype})) {
+ $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
+ }
$arch //= 'x86_64';
$machine ||= $default_machines->{$arch};
if ($add_pve_version) {
- $kvmversion //= kvm_user_version();
my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
$machine .= "+pve$pvever";
}
}
- if ($add_pve_version && $machine !~ m/\+pve\d+$/) {
+ if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
+ my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
+ $machine = $1 if $is_pxe;
+
# for version-pinned machines that do not include a pve-version (e.g.
# pc-q35-4.1), we assume 0 to keep them stable in case we bump
$machine .= '+pve0';
+
+ $machine .= '.pxe' if $is_pxe;
}
return $machine;
}
-sub get_ovmf_files($) {
- my ($arch) = @_;
+sub get_ovmf_files($$) {
+ my ($arch, $efidisk) = @_;
- my $ovmf = $OVMF->{$arch}
+ my $types = $OVMF->{$arch}
or die "no OVMF images known for architecture '$arch'\n";
- return @$ovmf;
+ my $type = 'default';
+ if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
+ $type = $efidisk->{'pre-enrolled-keys'} ? "4m-ms" : "4m";
+ }
+
+ return $types->{$type}->@*;
}
my $Arch2Qemu = {
$qemu_cmd,
'-machine', $default_machine,
'-display', 'none',
- '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server,nowait",
+ '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
'-mon', 'chardev=qmp,mode=control',
'-pidfile', $pidfile,
'-S', '-daemonize'
};
my $err = $@;
- # force stop with 10 sec timeout and 'nocheck'
- # always stop, even if QMP failed
+ # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
die $err if $err;
return \@flags;
}
+my sub get_cpuunits {
+ my ($conf) = @_;
+ return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
+}
sub config_to_command {
- my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu) = @_;
+ my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
+ $pbs_backing) = @_;
my $cmd = [];
my $globalFlags = [];
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
- my $cpuunits = defined($conf->{cpuunits}) ?
- $conf->{cpuunits} : $defaults->{cpuunits};
+ my $cpuunits = get_cpuunits($conf);
push @$cmd, $kvm_binary;
my $use_virtio = 0;
my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
- push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server,nowait";
+ push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
push @$cmd, '-mon', "chardev=qmp,mode=control";
if (min_version($machine_version, 2, 12)) {
}
if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
- my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch);
+ my $d;
+ if (my $efidisk = $conf->{efidisk0}) {
+ $d = parse_drive('efidisk0', $efidisk);
+ }
+
+ my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d);
die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
my ($path, $format);
- if (my $efidisk = $conf->{efidisk0}) {
- my $d = parse_drive('efidisk0', $efidisk);
+ my $read_only_str = '';
+ if ($d) {
my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
$format = $d->{format};
if ($storeid) {
die "efidisk format must be specified\n"
if !defined($format);
}
+
+ $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
} else {
warn "no efidisk configured! Using temporary efivars disk.\n";
$path = "/tmp/$vmid-ovmf.fd";
$size_str = ",size=" . (-s $ovmf_vars);
}
- push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly,file=$ovmf_code";
- push @$cmd, '-drive', "if=pflash,unit=1,format=$format,id=drive-efidisk0$size_str,file=$path";
+ # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
+ my $cache = "";
+ if ($path =~ m/^rbd:/) {
+ $cache = ',cache=writeback';
+ $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
+ }
+
+ push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
+ push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
}
# load q35 config
if (my $path = $conf->{"serial$i"}) {
if ($path eq 'socket') {
my $socket = "/var/run/qemu-server/${vmid}.serial$i";
- push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server,nowait";
+ push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
# On aarch64, serial0 is the UART device. Qemu only allows
# connecting UART devices via the '-serial' command line, as
# the device has a fixed slot on the hardware...
push @$devices, @$audio_devs;
}
+ add_tpm_device($vmid, $devices, $conf);
+
my $sockets = 1;
$sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
$sockets = $conf->{sockets} if $conf->{sockets};
push @$devices, '-device', print_vga_device(
$conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
- push @$cmd, '-vnc', "unix:$socket,password";
+ push @$cmd, '-vnc', "unix:$socket,password=on";
} else {
push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
push @$cmd, '-nographic';
if ($guest_agent->{enabled}) {
my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
- push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0";
+ push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
my ($ds, $drive) = @_;
if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
+ check_volume_storage_type($storecfg, $drive->{file});
push @$vollist, $drive->{file};
}
# ignore efidisk here, already added in bios/fw handling code above
return if $drive->{interface} eq 'efidisk';
+ # similar for TPM
+ return if $drive->{interface} eq 'tpmstate';
$use_virtio = 1 if $ds =~ m/^virtio/;
$ahcicontroller->{$controller}=1;
}
- my $drive_cmd = print_drive_commandline_full($storecfg, $vmid, $drive);
- $drive_cmd .= ',readonly' if PVE::QemuConfig->is_template($conf);
+ my $pbs_conf = $pbs_backing->{$ds};
+ my $pbs_name = undef;
+ if ($pbs_conf) {
+ $pbs_name = "drive-$ds-pbs";
+ push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
+ }
+
+ my $drive_cmd = print_drive_commandline_full(
+ $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
+
+ # extra protection for templates, but SATA and IDE don't support it..
+ $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
push @$devices, '-drive',$drive_cmd;
push @$devices, '-device', print_drivedevice_full(
print "activating and using '$vmstate' as vmstate\n";
}
+ if (PVE::QemuConfig->is_template($conf)) {
+ # needed to workaround base volumes being read-only
+ push @$cmd, '-snapshot';
+ }
+
# add custom args
if ($conf->{args}) {
my $aa = PVE::Tools::split_args($conf->{args});
die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
-
qemu_devicedel($vmid, $deviceid);
-
} elsif ($deviceid =~ m/^usb\d+$/) {
-
die "usb hotplug currently not reliable\n";
# when unplugging usb devices this way, there may be remaining usb
# controllers/hubs so we disable it for now
#qemu_devicedel($vmid, $deviceid);
#qemu_devicedelverify($vmid, $deviceid);
-
} elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
+ my $device = parse_drive($deviceid, $conf->{$deviceid});
- qemu_devicedel($vmid, $deviceid);
- qemu_devicedelverify($vmid, $deviceid);
- qemu_drivedel($vmid, $deviceid);
- qemu_iothread_del($conf, $vmid, $deviceid);
-
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_drivedel($vmid, $deviceid);
+ qemu_iothread_del($vmid, $deviceid, $device);
} elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
-
qemu_devicedel($vmid, $deviceid);
qemu_devicedelverify($vmid, $deviceid);
- qemu_iothread_del($conf, $vmid, $deviceid);
-
} elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
+ my $device = parse_drive($deviceid, $conf->{$deviceid});
- qemu_devicedel($vmid, $deviceid);
- qemu_drivedel($vmid, $deviceid);
+ qemu_devicedel($vmid, $deviceid);
+ qemu_drivedel($vmid, $deviceid);
qemu_deletescsihw($conf, $vmid, $deviceid);
+ qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
+ if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
} elsif ($deviceid =~ m/^(net)(\d+)$/) {
-
- qemu_devicedel($vmid, $deviceid);
- qemu_devicedelverify($vmid, $deviceid);
- qemu_netdevdel($vmid, $deviceid);
-
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_netdevdel($vmid, $deviceid);
} else {
die "can't unplug device '$deviceid'\n";
}
}
sub qemu_iothread_add {
- my($vmid, $deviceid, $device) = @_;
+ my ($vmid, $deviceid, $device) = @_;
if ($device->{iothread}) {
my $iothreads = vm_iothreads_list($vmid);
}
sub qemu_iothread_del {
- my($conf, $vmid, $deviceid) = @_;
+ my ($vmid, $deviceid, $device) = @_;
- my $confid = $deviceid;
- if ($deviceid =~ m/^(?:virtioscsi|scsihw)(\d+)$/) {
- $confid = 'scsi' . $1;
- }
- my $device = parse_drive($confid, $conf->{$confid});
if ($device->{iothread}) {
my $iothreads = vm_iothreads_list($vmid);
qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
}
sub qemu_objectadd {
- my($vmid, $objectid, $qomtype) = @_;
+ my ($vmid, $objectid, $qomtype) = @_;
mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
}
sub qemu_objectdel {
- my($vmid, $objectid) = @_;
+ my ($vmid, $objectid) = @_;
mon_cmd($vmid, "object-del", id => $objectid);
sub qemu_driveadd {
my ($storecfg, $vmid, $device) = @_;
- my $drive = print_drive_commandline_full($storecfg, $vmid, $device);
+ my $kvmver = get_running_qemu_version($vmid);
+ my $io_uring = min_version($kvmver, 6, 0);
+ my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
$drive =~ s/\\/\\\\/g;
my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
}
sub qemu_drivedel {
- my($vmid, $deviceid) = @_;
+ my ($vmid, $deviceid) = @_;
my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
$ret =~ s/^\s+//;
my $scsihwid="$controller_prefix$controller";
my $devices_list = vm_devices_list($vmid);
- if(!defined($devices_list->{$scsihwid})) {
+ if (!defined($devices_list->{$scsihwid})) {
vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
}
foreach my $opt (keys %{$devices_list}) {
if (is_valid_drivename($opt)) {
my $drive = parse_drive($opt, $conf->{$opt});
- if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
+ if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
return 1;
}
}
my $padding = (1024 - $size % 1024) % 1024;
$size = $size + $padding;
- mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size));
-
+ mon_cmd(
+ $vmid,
+ "block_resize",
+ device => $deviceid,
+ size => int($size),
+ timeout => 60,
+ );
}
sub qemu_volume_snapshot {
my $running = check_running($vmid);
- if ($running && do_snapshots_with_qemu($storecfg, $volid)){
+ if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
} else {
PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
});
}
- if ($running && do_snapshots_with_qemu($storecfg, $volid)){
+ if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
} else {
PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
}
sub set_migration_caps {
- my ($vmid) = @_;
+ my ($vmid, $savevm) = @_;
my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
+ my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
+ my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
+
my $cap_ref = [];
my $enabled_cap = {
"x-rdma-pin-all" => 0,
"zero-blocks" => 0,
"compress" => 0,
- "dirty-bitmaps" => $qemu_support->{'pbs-dirty-bitmap-migration'} ? 1 : 0,
+ "dirty-bitmaps" => $dirty_bitmaps,
};
my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
$volhash->{$volid}->{is_vmstate} //= 0;
$volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
+ $volhash->{$volid}->{is_tpmstate} //= 0;
+ $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
+
$volhash->{$volid}->{is_unused} //= 0;
$volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
+
+ $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
};
my $include_opts = {
die "skip\n" if !$hotplug_features->{memory};
PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
} elsif ($opt eq 'cpuunits') {
- $cgroup->change_cpu_shares(undef, $defaults->{cpuunits});
+ $cgroup->change_cpu_shares(undef, 1024);
} elsif ($opt eq 'cpulimit') {
$cgroup->change_cpu_quota(-1, 100000);
} else {
$conf->{$opt} = delete $conf->{pending}->{$opt};
}
+ my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
+ foreach my $opt (sort keys %$pending_delete_hash) {
+ next if !grep { $_ eq $opt } @cloudinit_opts;
+ PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
+ delete $conf->{$opt};
+ }
+
my $new_conf = { %$conf };
$new_conf->{$key} = $value;
PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
$vmid, $opt, $value, $arch, $machine_type);
} elsif (is_valid_drivename($opt)) {
- die "skip\n" if $opt eq 'efidisk0';
+ die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
# some changes can be done without hotplug
my $drive = parse_drive($opt, $value);
if (drive_is_cloudinit($drive)) {
die "skip\n" if !$hotplug_features->{memory};
$value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
} elsif ($opt eq 'cpuunits') {
- $cgroup->change_cpu_shares($conf->{pending}->{$opt}, $defaults->{cpuunits});
+ $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
} elsif ($opt eq 'cpulimit') {
my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
$cgroup->change_cpu_quota($cpulimit, 100000);
# timeout => in seconds
# paused => start VM in paused state (backup)
# resume => resume from hibernation
+# pbs-backing => {
+# sata0 => {
+# repository
+# snapshot
+# keyfile
+# archive
+# },
+# virtio2 => ...
+# }
# migrate_opts:
# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
# migratedfrom => source node
$conf = PVE::QemuConfig->load_config($vmid); # update/reload
}
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
+ # don't regenerate the ISO if the VM is started as part of a live migration
+ # this way we can reuse the old ISO with the correct config
+ PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
my $defaults = load_defaults();
print "Resuming suspended VM\n";
}
- my ($cmd, $vollist, $spice_port) =
- config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
+ my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
+ $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
my $migration_ip;
my $get_migration_ip = sub {
push @$cmd, '-S';
}
- # host pci devices
+ my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
+
+ my $pci_devices = {}; # host pci devices
for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
- my $d = parse_hostpci($conf->{"hostpci$i"});
- next if !$d;
- my $pcidevices = $d->{pciid};
- foreach my $pcidevice (@$pcidevices) {
- my $pciid = $pcidevice->{id};
-
- my $info = PVE::SysFSTools::pci_device_info("$pciid");
- die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support();
- die "no pci device info for device '$pciid'\n" if !$info;
-
- if ($d->{mdev}) {
- my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i);
- PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $d->{mdev});
- } else {
- die "can't unbind/bind PCI group to VFIO '$pciid'\n"
- if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
- die "can't reset PCI device '$pciid'\n"
- if $info->{has_fl_reset} && !PVE::SysFSTools::pci_dev_reset($info);
+ my $dev = $conf->{"hostpci$i"} or next;
+ $pci_devices->{$i} = parse_hostpci($dev);
+ }
+
+ my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
+ # reserve all PCI IDs before actually doing anything with them
+ PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
+
+ eval {
+ for my $id (sort keys %$pci_devices) {
+ my $d = $pci_devices->{$id};
+ for my $dev ($d->{pciid}->@*) {
+ PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
}
- }
+ }
+ };
+ if (my $err = $@) {
+ eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
+ warn $@ if $@;
+ die $err;
}
PVE::Storage::activate_volumes($storecfg, $vollist);
eval {
- run_command(['/bin/systemctl', 'stop', "$vmid.scope"],
- outfunc => sub {}, errfunc => sub {});
+ run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
};
# Issues with the above 'stop' not being fully completed are extremely rare, a very low
# timeout should be more than enough here...
PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
- my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits}
- : $defaults->{cpuunits};
+ my $cpuunits = get_cpuunits($conf);
- my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
my %run_params = (
timeout => $statefile ? undef : $start_timeout,
umask => 0077,
$run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
}
- my %properties = (
+ my %systemd_properties = (
Slice => 'qemu.slice',
- KillMode => 'none'
+ KillMode => 'process',
+ SendSIGKILL => 0,
+ TimeoutStopUSec => ULONG_MAX, # infinity
);
if (PVE::CGroup::cgroup_mode() == 2) {
- $properties{CPUWeight} = $cpuunits;
+ $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
+ $systemd_properties{CPUWeight} = $cpuunits;
} else {
- $properties{CPUShares} = $cpuunits;
+ $systemd_properties{CPUShares} = $cpuunits;
}
if (my $cpulimit = $conf->{cpulimit}) {
- $properties{CPUQuota} = int($cpulimit * 100);
+ $systemd_properties{CPUQuota} = int($cpulimit * 100);
}
- $properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
+ $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
my $run_qemu = sub {
PVE::Tools::run_fork sub {
- PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties);
+ PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
+
+ my $tpmpid;
+ if (my $tpm = $conf->{tpmstate0}) {
+ # start the TPM emulator so QEMU can connect on start
+ $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
+ }
my $exitcode = run_command($cmd, %run_params);
- die "QEMU exited with code $exitcode\n" if $exitcode;
+ if ($exitcode) {
+ warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
+ kill 'TERM', $tpmpid if $tpmpid;
+ die "QEMU exited with code $exitcode\n";
+ }
};
};
if (my $err = $@) {
# deactivate volumes if start fails
eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
+ eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
+
die "start failed: $err";
}
+ # re-reserve all PCI IDs now that we can know the actual VM PID
+ my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
+ eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
+ warn $@ if $@;
+
print "migration listens on $migrate_uri\n" if $migrate_uri;
$res->{migrate_uri} = $migrate_uri;
if (!$keepActive) {
my $vollist = get_vm_volumes($conf);
PVE::Storage::deactivate_volumes($storecfg, $vollist);
+
+ if (my $tpmdrive = $conf->{tpmstate0}) {
+ my $tpm = parse_drive("tpmstate0", $tpmdrive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+ if ($storeid) {
+ PVE::Storage::unmap_volume($storecfg, $tpm->{file});
+ }
+ }
}
foreach my $ext (qw(mon qmp pid vnc qga)) {
unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
}
+ my $ids = [];
foreach my $key (keys %$conf) {
next if $key !~ m/^hostpci(\d+)$/;
my $hostpciindex = $1;
foreach my $pci (@{$d->{pciid}}) {
my $pciid = $pci->{id};
+ push @$ids, $pci->{id};
PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
}
}
+ PVE::QemuServer::PCI::remove_pci_reservation($ids);
vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
};
PVE::Storage::activate_volumes($storecfg, [$vmstate]);
eval {
+ set_migration_caps($vmid, 1);
mon_cmd($vmid, "savevm-start", statefile => $path);
for(;;) {
my $state = mon_cmd($vmid, "query-savevm");
return $map;
};
-my $restore_update_config_line = sub {
- my ($outfd, $cookie, $vmid, $map, $line, $unique) = @_;
+sub restore_update_config_line {
+ my ($cookie, $map, $line, $unique) = @_;
- return if $line =~ m/^\#qmdump\#/;
- return if $line =~ m/^\#vzdump\#/;
- return if $line =~ m/^lock:/;
- return if $line =~ m/^unused\d+:/;
- return if $line =~ m/^parent:/;
+ return '' if $line =~ m/^\#qmdump\#/;
+ return '' if $line =~ m/^\#vzdump\#/;
+ return '' if $line =~ m/^lock:/;
+ return '' if $line =~ m/^unused\d+:/;
+ return '' if $line =~ m/^parent:/;
+
+ my $res = '';
my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
};
my $netstr = print_net($net);
- print $outfd "net$cookie->{netcount}: $netstr\n";
+ $res .= "net$cookie->{netcount}: $netstr\n";
$cookie->{netcount}++;
}
} elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
my $net = parse_net($netstr);
$net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
$netstr = print_net($net);
- print $outfd "$id: $netstr\n";
- } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk)\d+):\s*(\S+)\s*$/) {
+ $res .= "$id: $netstr\n";
+ } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
my $virtdev = $1;
my $value = $3;
my $di = parse_drive($virtdev, $value);
if (defined($di->{backup}) && !$di->{backup}) {
- print $outfd "#$line";
+ $res .= "#$line";
} elsif ($map->{$virtdev}) {
delete $di->{format}; # format can change on restore
$di->{file} = $map->{$virtdev};
$value = print_drive($di);
- print $outfd "$virtdev: $value\n";
+ $res .= "$virtdev: $value\n";
} else {
- print $outfd $line;
+ $res .= $line;
}
} elsif (($line =~ m/^vmgenid: (.*)/)) {
my $vmgenid = $1;
# always generate a new vmgenid if there was a valid one setup
$vmgenid = generate_uuid();
}
- print $outfd "vmgenid: $vmgenid\n";
+ $res .= "vmgenid: $vmgenid\n";
} elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
my ($uuid, $uuid_str);
UUID::generate($uuid);
UUID::unparse($uuid, $uuid_str);
my $smbios1 = parse_smbios1($2);
$smbios1->{uuid} = $uuid_str;
- print $outfd $1.print_smbios1($smbios1)."\n";
+ $res .= $1.print_smbios1($smbios1)."\n";
} else {
- print $outfd $line;
+ $res .= $line;
}
-};
+
+ return $res;
+}
my $restore_deactivate_volumes = sub {
my ($storecfg, $devinfo) = @_;
sub scan_volids {
my ($cfg, $vmid) = @_;
- my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid);
+ my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
my $volid_hash = {};
foreach my $storeid (keys %$info) {
my $cfg = PVE::Storage::config();
- # FIXME: Remove once our RBD plugin can handle CT and VM on a single storage
- # see: https://pve.proxmox.com/pipermail/pve-devel/2018-July/032900.html
- foreach my $stor (keys %{$cfg->{ids}}) {
- delete($cfg->{ids}->{$stor}) if ! $cfg->{ids}->{$stor}->{content}->{images};
- }
-
print "rescan volumes...\n";
my $volid_hash = scan_volids($cfg, $vmid);
my $repo = PVE::PBSClient::get_repository($scfg);
- # This is only used for `pbs-restore`!
+ # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
local $ENV{PBS_PASSWORD} = $password;
local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
mkpath $tmpdir;
my $conffile = PVE::QemuConfig->config_file($vmid);
- my $tmpfn = "$conffile.$$.tmp";
# disable interrupts (always do cleanups)
local $SIG{INT} =
local $SIG{TERM} =
# Note: $oldconf is undef if VM does not exists
my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
+ my $new_conf_raw = '';
my $rpcenv = PVE::RPCEnvironment::get();
my $devinfo = {};
my $d = $virtdev_hash->{$virtdev};
next if $d->{is_cloudinit}; # no need to restore cloudinit
+ # this fails if storage is unavailable
my $volid = $d->{volid};
-
my $path = PVE::Storage::path($storecfg, $volid);
- # This is the ONLY user of the PBS_ env vars set on top of this function!
+ # for live-restore we only want to preload the efidisk and TPM state
+ next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
+
my $pbs_restore_cmd = [
'/usr/bin/pbs-restore',
'--repository', $repo,
$fh->seek(0, 0) || die "seek failed - $!\n";
- my $outfd = IO::File->new($tmpfn, "w") || die "unable to write config for VM $vmid\n";
-
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $options->{unique});
+ $new_conf_raw .= restore_update_config_line(
+ $cookie,
+ $map,
+ $line,
+ $options->{unique},
+ );
}
$fh->close();
- $outfd->close();
};
my $err = $@;
- $restore_deactivate_volumes->($storecfg, $devinfo);
+ if ($err || !$options->{live}) {
+ $restore_deactivate_volumes->($storecfg, $devinfo);
+ }
rmtree $tmpdir;
if ($err) {
- unlink $tmpfn;
$restore_destroy_volumes->($storecfg, $devinfo);
die $err;
}
- rename($tmpfn, $conffile) ||
- die "unable to commit configuration file '$conffile'\n";
+ if ($options->{live}) {
+ # keep lock during live-restore
+ $new_conf_raw .= "\nlock: create";
+ }
+
+ PVE::Tools::file_set_contents($conffile, $new_conf_raw);
PVE::Cluster::cfs_update(); # make sure we read new file
eval { rescan($vmid, 1); };
warn $@ if $@;
+
+ PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
+
+ if ($options->{live}) {
+ # enable interrupts
+ local $SIG{INT} =
+ local $SIG{TERM} =
+ local $SIG{QUIT} =
+ local $SIG{HUP} =
+ local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
+
+ my $conf = PVE::QemuConfig->load_config($vmid);
+ die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
+
+ # these special drives are already restored before start
+ delete $devinfo->{'drive-efidisk0'};
+ delete $devinfo->{'drive-tpmstate0-backup'};
+ pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
+
+ PVE::QemuConfig->remove_lock($vmid, "create");
+ }
+}
+
+sub pbs_live_restore {
+ my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
+
+ print "starting VM for live-restore\n";
+ print "repository: '$repo', snapshot: '$snap'\n";
+
+ my $pbs_backing = {};
+ for my $ds (keys %$restored_disks) {
+ $ds =~ m/^drive-(.*)$/;
+ my $confname = $1;
+ $pbs_backing->{$confname} = {
+ repository => $repo,
+ snapshot => $snap,
+ archive => "$ds.img.fidx",
+ };
+ $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
+
+ my $drive = parse_drive($confname, $conf->{$confname});
+ print "restoring '$ds' to '$drive->{file}'\n";
+ }
+
+ my $drives_streamed = 0;
+ eval {
+ # make sure HA doesn't interrupt our restore by stopping the VM
+ if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
+ run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
+ }
+
+ # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
+ # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
+ vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
+
+ my $qmeventd_fd = register_qmeventd_handle($vmid);
+
+ # begin streaming, i.e. data copy from PBS to target disk for every vol,
+ # this will effectively collapse the backing image chain consisting of
+ # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
+ # removes itself once all backing images vanish with 'auto-remove=on')
+ my $jobs = {};
+ for my $ds (sort keys %$restored_disks) {
+ my $job_id = "restore-$ds";
+ mon_cmd($vmid, 'block-stream',
+ 'job-id' => $job_id,
+ device => "$ds",
+ );
+ $jobs->{$job_id} = {};
+ }
+
+ mon_cmd($vmid, 'cont');
+ qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
+
+ print "restore-drive jobs finished successfully, removing all tracking block devices"
+ ." to disconnect from Proxmox Backup Server\n";
+
+ for my $ds (sort keys %$restored_disks) {
+ mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
+ }
+
+ close($qmeventd_fd);
+ };
+
+ my $err = $@;
+
+ if ($err) {
+ warn "An error occured during live-restore: $err\n";
+ _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
+ die "live-restore failed\n";
+ }
}
sub restore_vma_archive {
my $rpcenv = PVE::RPCEnvironment::get();
my $conffile = PVE::QemuConfig->config_file($vmid);
- my $tmpfn = "$conffile.$$.tmp";
# Note: $oldconf is undef if VM does not exist
my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
+ my $new_conf_raw = '';
my %storage_limits;
my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
- foreach my $key (keys %storage_limits) {
- my $limit = PVE::Storage::get_bandwidth_limit('restore', [$key], $bwlimit);
- next if !$limit;
- print STDERR "rate limit for storage $key: $limit KiB/s\n";
- $storage_limits{$key} = $limit * 1024;
+ foreach my $info (values %{$virtdev_hash}) {
+ my $storeid = $info->{storeid};
+ next if defined($storage_limits{$storeid});
+
+ my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
+ print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
+ $storage_limits{$storeid} = $limit * 1024;
}
foreach my $devname (keys %$devinfo) {
$fh->seek(0, 0) || die "seek failed - $!\n";
- my $outfd = IO::File->new($tmpfn, "w") || die "unable to write config for VM $vmid\n";
-
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
+ $new_conf_raw .= restore_update_config_line(
+ $cookie,
+ $map,
+ $line,
+ $opts->{unique},
+ );
}
$fh->close();
- $outfd->close();
};
eval {
rmtree $tmpdir;
if ($err) {
- unlink $tmpfn;
$restore_destroy_volumes->($cfg, $devinfo);
die $err;
}
- rename($tmpfn, $conffile) ||
- die "unable to commit configuration file '$conffile'\n";
+ PVE::Tools::file_set_contents($conffile, $new_conf_raw);
PVE::Cluster::cfs_update(); # make sure we read new file
eval { rescan($vmid, 1); };
warn $@ if $@;
+
+ PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
}
sub restore_tar_archive {
local $ENV{VZDUMP_USER} = $user;
my $conffile = PVE::QemuConfig->config_file($vmid);
- my $tmpfn = "$conffile.$$.tmp";
+ my $new_conf_raw = '';
# disable interrupts (always do cleanups)
local $SIG{INT} =
my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
- my $outfd = IO::File->new($tmpfn, "w") || die "unable to write config for VM $vmid\n";
-
my $cookie = { netcount => 0 };
while (defined (my $line = <$srcfd>)) {
- $restore_update_config_line->($outfd, $cookie, $vmid, $map, $line, $opts->{unique});
+ $new_conf_raw .= restore_update_config_line(
+ $cookie,
+ $map,
+ $line,
+ $opts->{unique},
+ );
}
$srcfd->close();
- $outfd->close();
};
if (my $err = $@) {
- unlink $tmpfn;
tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
die $err;
}
rmtree $tmpdir;
- rename $tmpfn, $conffile ||
- die "unable to commit configuration file '$conffile'\n";
+ PVE::Tools::file_set_contents($conffile, $new_conf_raw);
PVE::Cluster::cfs_update(); # make sure we read new file
rbd => 1,
};
sub do_snapshots_with_qemu {
- my ($storecfg, $volid) = @_;
+ my ($storecfg, $volid, $deviceid) = @_;
+
+ return if $deviceid =~ m/tpmstate0/;
my $storage_name = PVE::Storage::parse_volume_id($volid);
my $scfg = $storecfg->{ids}->{$storage_name};
if($line =~ m/\((\S+)\/100\%\)/){
my $percent = $1;
my $transferred = int($size * $percent / 100);
- my $remaining = $size - $transferred;
+ my $total_h = render_bytes($size, 1);
+ my $transferred_h = render_bytes($transferred, 1);
- print "transferred: $transferred bytes remaining: $remaining bytes total: $size bytes progression: $percent %\n";
+ print "transferred $transferred_h of $total_h ($percent%)\n";
}
};
# 'complete': wait until all jobs are ready, block-job-complete them (default)
# 'cancel': wait until all jobs are ready, block-job-cancel them
# 'skip': wait until all jobs are ready, return with block jobs in ready state
+# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
sub qemu_drive_mirror_monitor {
- my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
+ my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
$completion //= 'complete';
+ $op //= "mirror";
eval {
my $err_complete = 0;
+ my $starttime = time ();
while (1) {
- die "storage migration timed out\n" if $err_complete > 300;
+ die "block job ('$op') timed out\n" if $err_complete > 300;
my $stats = mon_cmd($vmid, "query-block-jobs");
+ my $ctime = time();
- my $running_mirror_jobs = {};
- foreach my $stat (@$stats) {
- next if $stat->{type} ne 'mirror';
- $running_mirror_jobs->{$stat->{device}} = $stat;
+ my $running_jobs = {};
+ for my $stat (@$stats) {
+ next if $stat->{type} ne $op;
+ $running_jobs->{$stat->{device}} = $stat;
}
my $readycounter = 0;
- foreach my $job (keys %$jobs) {
+ for my $job_id (sort keys %$jobs) {
+ my $job = $running_jobs->{$job_id};
- if(defined($jobs->{$job}->{complete}) && !defined($running_mirror_jobs->{$job})) {
- print "$job : finished\n";
- delete $jobs->{$job};
+ my $vanished = !defined($job);
+ my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
+ if($complete || ($vanished && $completion eq 'auto')) {
+ print "$job_id: $op-job finished\n";
+ delete $jobs->{$job_id};
next;
}
- die "$job: mirroring has been cancelled\n" if !defined($running_mirror_jobs->{$job});
+ die "$job_id: '$op' has been cancelled\n" if !defined($job);
- my $busy = $running_mirror_jobs->{$job}->{busy};
- my $ready = $running_mirror_jobs->{$job}->{ready};
- if (my $total = $running_mirror_jobs->{$job}->{len}) {
- my $transferred = $running_mirror_jobs->{$job}->{offset} || 0;
+ my $busy = $job->{busy};
+ my $ready = $job->{ready};
+ if (my $total = $job->{len}) {
+ my $transferred = $job->{offset} || 0;
my $remaining = $total - $transferred;
my $percent = sprintf "%.2f", ($transferred * 100 / $total);
- print "$job: transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
+ my $duration = $ctime - $starttime;
+ my $total_h = render_bytes($total, 1);
+ my $transferred_h = render_bytes($transferred, 1);
+
+ my $status = sprintf(
+ "transferred $transferred_h of $total_h ($percent%%) in %s",
+ render_duration($duration),
+ );
+
+ if ($ready) {
+ if ($busy) {
+ $status .= ", still busy"; # shouldn't even happen? but mirror is weird
+ } else {
+ $status .= ", ready";
+ }
+ }
+ print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
+ $jobs->{$job_id}->{ready} = $ready;
}
- $readycounter++ if $running_mirror_jobs->{$job}->{ready};
+ $readycounter++ if $job->{ready};
}
last if scalar(keys %$jobs) == 0;
if ($readycounter == scalar(keys %$jobs)) {
- print "all mirroring jobs are ready \n";
- last if $completion eq 'skip'; #do the complete later
+ print "all '$op' jobs are ready\n";
+
+ # do the complete later (or has already been done)
+ last if $completion eq 'skip' || $completion eq 'auto';
if ($vmiddst && $vmiddst != $vmid) {
my $agent_running = $qga && qga_check_running($vmid);
last;
} else {
- foreach my $job (keys %$jobs) {
+ for my $job_id (sort keys %$jobs) {
# try to switch the disk if source and destination are on the same guest
- print "$job: Completing block job...\n";
+ print "$job_id: Completing block job_id...\n";
my $op;
if ($completion eq 'complete') {
} else {
die "invalid completion value: $completion\n";
}
- eval { mon_cmd($vmid, $op, device => $job) };
+ eval { mon_cmd($vmid, $op, device => $job_id) };
if ($@ =~ m/cannot be completed/) {
- print "$job: Block job cannot be completed, try again.\n";
+ print "$job_id: block job cannot be completed, trying again.\n";
$err_complete++;
}else {
- print "$job: Completed successfully.\n";
- $jobs->{$job}->{complete} = 1;
+ print "$job_id: Completed successfully.\n";
+ $jobs->{$job_id}->{complete} = 1;
}
}
}
if ($err) {
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
- die "mirroring error: $err";
+ die "block job ($op) error: $err";
}
-
}
sub qemu_blockjobs_cancel {
$size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
} elsif ($drivename eq 'efidisk0') {
$size = get_efivars_size($conf);
+ } elsif ($drivename eq 'tpmstate0') {
+ $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
} else {
($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
}
PVE::Storage::activate_volumes($storecfg, [$newvolid]);
if (drive_is_cloudinit($drive)) {
+ # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
+ # if this is the case, we have to complete any block-jobs still there from
+ # previous drive-mirrors
+ if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
+ qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
+ }
goto no_data_clone;
}
# that is given by the OVMF_VARS.fd
my $src_path = PVE::Storage::path($storecfg, $drive->{file});
my $dst_path = PVE::Storage::path($storecfg, $newvolid);
- run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=1", "count=$size",
+
+ # better for Ceph if block size is not too small, see bug #3324
+ my $bs = 1024*1024;
+
+ run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
"if=$src_path", "of=$dst_path"]);
} else {
qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
}
} else {
+ die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
+
my $kvmver = get_running_qemu_version ($vmid);
if (!min_version($kvmver, 2, 7)) {
die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
sub get_efivars_size {
my ($conf) = @_;
my $arch = get_vm_arch($conf);
- my (undef, $ovmf_vars) = get_ovmf_files($arch);
+ my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
+ my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk);
die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
return -s $ovmf_vars;
}
return;
}
-sub create_efidisk($$$$$) {
- my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_;
+sub update_tpmstate_size {
+ my ($conf) = @_;
+
+ my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
+ $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
+ $conf->{tpmstate0} = print_drive($disk);
+}
+
+sub create_efidisk($$$$$$) {
+ my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk) = @_;
- my (undef, $ovmf_vars) = get_ovmf_files($arch);
+ my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk);
die "EFI vars default image not found\n" if ! -f $ovmf_vars;
my $vars_size_b = -s $ovmf_vars;
return $bootorder;
}
+sub register_qmeventd_handle {
+ my ($vmid) = @_;
+
+ my $fh;
+ my $peer = "/var/run/qmeventd.sock";
+ my $count = 0;
+
+ for (;;) {
+ $count++;
+ $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
+ last if $fh;
+ if ($! != EINTR && $! != EAGAIN) {
+ die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
+ }
+ if ($count > 4) {
+ die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
+ . "after $count retries\n";
+ }
+ usleep(25000);
+ }
+
+ # send handshake to mark VM as backing up
+ print $fh to_json({vzdump => {vmid => "$vmid"}});
+
+ # return handle to be closed later when inhibit is no longer required
+ return $fh;
+}
+
# bash completion helper
sub complete_backup_archives {
return $res;
}
+sub vm_is_paused {
+ my ($vmid) = @_;
+ my $qmpstatus = eval {
+ PVE::QemuConfig::assert_config_exists_on_node($vmid);
+ mon_cmd($vmid, "query-status");
+ };
+ warn "$@\n" if $@;
+ return $qmpstatus && $qmpstatus->{status} eq "paused";
+}
+
+sub check_volume_storage_type {
+ my ($storecfg, $vol) = @_;
+
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
+
+ die "storage '$storeid' does not support content-type '$vtype'\n"
+ if !$scfg->{content}->{$vtype};
+
+ return 1;
+}
+
1;