use PVE::QemuServer::Cloudinit;
use PVE::QemuServer::CGroup;
use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
-use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom parse_drive print_drive);
+use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
use PVE::QemuServer::Machine;
use PVE::QemuServer::Memory;
use PVE::QemuServer::Monitor qw(mon_cmd);
cpuunits => {
optional => 1,
type => 'integer',
- description => "CPU weight for a VM.",
- verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.",
+ description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
+ verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
+ ." The larger the number is, the more CPU time this VM gets. Number is relative to"
+ ." weights of all the other running VMs.",
minimum => 2,
maximum => 262144,
- default => 1024,
+ default => 'cgroup v1: 1024, cgroup v2: 100',
},
memory => {
optional => 1,
description => {
optional => 1,
type => 'string',
- description => "Description for the VM. Only used on the configuration web interface. This is saved as comment inside the configuration file.",
+ description => "Description for the VM. Shown in the web-interface VM's summary."
+ ." This is saved as comment inside the configuration file.",
+ maxLength => 1024 * 8,
},
ostype => {
optional => 1,
$confdesc->{"numa$i"} = $numadesc;
}
-my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio',
- 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3',
- 'e1000-82540em', 'e1000-82544gc', 'e1000-82545em'];
+my $nic_model_list = [
+ 'e1000',
+ 'e1000-82540em',
+ 'e1000-82544gc',
+ 'e1000-82545em',
+ 'e1000e',
+ 'i82551',
+ 'i82557b',
+ 'i82559er',
+ 'ne2k_isa',
+ 'ne2k_pci',
+ 'pcnet',
+ 'rtl8139',
+ 'virtio',
+ 'vmxnet3',
+];
my $nic_model_list_txt = join(' ', sort @$nic_model_list);
my $net_fmt_bridge_descr = <<__EOD__;
sub verify_bootdev {
my ($dev, $noerr) = @_;
- return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && $dev !~ m/^efidisk/;
+ my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
+ return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
my $check = sub {
my ($base) = @_;
}
}
- if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){
+ if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
$device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
} else {
$device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
}
sub print_drive_commandline_full {
- my ($storecfg, $vmid, $drive, $pbs_name) = @_;
+ my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
my $path;
my $volid = $drive->{file};
my $format = $drive->{format};
my $drive_id = get_drive_id($drive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+ my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
+
if (drive_is_cdrom($drive)) {
$path = get_iso_path($storecfg, $vmid, $volid);
die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
} else {
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
if ($storeid) {
$path = PVE::Storage::path($storecfg, $volid);
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
$format //= qemu_img_format($scfg, $volname);
} else {
$path = $volid;
if (my $cache = $drive->{cache}) {
$cache_direct = $cache =~ /^(?:off|none|directsync)$/;
- } elsif (!drive_is_cdrom($drive)) {
+ } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
$opts .= ",cache=none";
$cache_direct = 1;
}
- # aio native works only with O_DIRECT
+ # io_uring with cache mode writeback or writethrough on krbd will hang...
+ my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
+
+ # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
+ # sometimes, just plain disable...
+ my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
+
if (!$drive->{aio}) {
- if($cache_direct) {
- $opts .= ",aio=native";
+ if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
+ # io_uring supports all cache modes
+ $opts .= ",aio=io_uring";
} else {
- $opts .= ",aio=threads";
+ # aio native works only with O_DIRECT
+ if($cache_direct) {
+ $opts .= ",aio=native";
+ } else {
+ $opts .= ",aio=threads";
+ }
}
}
$romfile = 'pxe-virtio.rom';
} elsif ($device eq 'e1000') {
$romfile = 'pxe-e1000.rom';
+ } elsif ($device eq 'e1000e') {
+ $romfile = 'pxe-e1000e.rom';
} elsif ($device eq 'ne2k') {
$romfile = 'pxe-ne2k_pci.rom';
} elsif ($device eq 'pcnet') {
if ($conf->{template}) {
# check if any base image is still used by a linked clone
- PVE::QemuConfig->foreach_volume($conf, sub {
+ PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive);
});
}
- # only remove disks owned by this VM (referenced in the config)
- PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
+ my $volids = {};
+ my $remove_owned_drive = sub {
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive, 1);
my $volid = $drive->{file};
return if !$volid || $volid =~ m|^/|;
+ return if $volids->{$volid};
my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
return if !$path || !$owner || ($owner != $vmid);
+ $volids->{$volid} = 1;
eval { PVE::Storage::vdisk_free($storecfg, $volid) };
warn "Could not remove disk '$volid', check manually: $@" if $@;
- });
+ };
+
+ # only remove disks owned by this VM (referenced in the config)
+ my $include_opts = {
+ include_unused => 1,
+ extra_keys => ['vmstate'],
+ };
+ PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
+
+ for my $snap (values %{$conf->{snapshots}}) {
+ next if !defined($snap->{vmstate});
+ my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
+ next if !defined($drive);
+ $remove_owned_drive->('vmstate', $drive);
+ }
+
+ PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
if ($purge_unreferenced) { # also remove unreferenced disk
- my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
+ my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
PVE::Storage::foreach_volid($vmdisks, sub {
my ($volid, $sid, $volname, $d) = @_;
eval { PVE::Storage::vdisk_free($storecfg, $volid) };
return if !$sid;
# check if storage is available on both nodes
- my $scfg = PVE::Storage::storage_check_node($storecfg, $sid);
- PVE::Storage::storage_check_node($storecfg, $sid, $node);
+ my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
+ PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
+
+ my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
+
+ die "$volid: content type '$vtype' is not available on storage '$sid'\n"
+ if !$scfg->{content}->{$vtype};
});
}
my $conf = PVE::QemuConfig->load_config($vmid);
- my $d = { vmid => $vmid };
- $d->{pid} = $list->{$vmid}->{pid};
+ my $d = { vmid => int($vmid) };
+ $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
# fixme: better status?
$d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
$d->{diskread} = 0;
$d->{diskwrite} = 0;
- $d->{template} = PVE::QemuConfig->is_template($conf);
+ $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
$d->{serial} = 1 if conf_has_serial($conf);
$d->{lock} = $conf->{lock} if $conf->{lock};
$d->{netin} += $netdev->{$dev}->{transmit};
if ($full) {
- $d->{nics}->{$dev}->{netout} = $netdev->{$dev}->{receive};
- $d->{nics}->{$dev}->{netin} = $netdev->{$dev}->{transmit};
+ $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
+ $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
}
}
return $devs;
}
+sub get_tpm_paths {
+ my ($vmid) = @_;
+ return {
+ socket => "/var/run/qemu-server/$vmid.swtpm",
+ pid => "/var/run/qemu-server/$vmid.swtpm.pid",
+ };
+}
+
+sub add_tpm_device {
+ my ($vmid, $devices, $conf) = @_;
+
+ return if !$conf->{tpmstate0};
+
+ my $paths = get_tpm_paths($vmid);
+
+ push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
+ push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
+ push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
+}
+
+sub start_swtpm {
+ my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
+
+ return if !$tpmdrive;
+
+ my $state;
+ my $tpm = parse_drive("tpmstate0", $tpmdrive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+ if ($storeid) {
+ $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
+ } else {
+ $state = $tpm->{file};
+ }
+
+ my $paths = get_tpm_paths($vmid);
+
+ # during migration, we will get state from remote
+ #
+ if (!$migration) {
+ # run swtpm_setup to create a new TPM state if it doesn't exist yet
+ my $setup_cmd = [
+ "swtpm_setup",
+ "--tpmstate",
+ "file://$state",
+ "--createek",
+ "--create-ek-cert",
+ "--create-platform-cert",
+ "--lock-nvram",
+ "--config",
+ "/etc/swtpm_setup.conf", # do not use XDG configs
+ "--runas",
+ "0", # force creation as root, error if not possible
+ "--not-overwrite", # ignore existing state, do not modify
+ ];
+
+ push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+ # TPM 2.0 supports ECC crypto, use if possible
+ push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
+
+ run_command($setup_cmd, outfunc => sub {
+ print "swtpm_setup: $1\n";
+ });
+ }
+
+ my $emulator_cmd = [
+ "swtpm",
+ "socket",
+ "--tpmstate",
+ "backend-uri=file://$state,mode=0600",
+ "--ctrl",
+ "type=unixio,path=$paths->{socket},mode=0600",
+ "--pid",
+ "file=$paths->{pid}",
+ "--terminate", # terminate on QEMU disconnect
+ "--daemon",
+ ];
+ push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+ run_command($emulator_cmd, outfunc => sub { print $1; });
+
+ # return untainted PID of swtpm daemon so it can be killed on error
+ file_read_firstline($paths->{pid}) =~ m/(\d+)/;
+ return $1;
+}
+
sub vga_conf_has_spice {
my ($vga) = @_;
$qemu_cmd,
'-machine', $default_machine,
'-display', 'none',
- '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server,nowait",
+ '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
'-mon', 'chardev=qmp,mode=control',
'-pidfile', $pidfile,
'-S', '-daemonize'
return \@flags;
}
+my sub get_cpuunits {
+ my ($conf) = @_;
+ return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
+}
sub config_to_command {
my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
$pbs_backing) = @_;
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
- my $cpuunits = defined($conf->{cpuunits}) ?
- $conf->{cpuunits} : $defaults->{cpuunits};
+ my $cpuunits = get_cpuunits($conf);
push @$cmd, $kvm_binary;
my $use_virtio = 0;
my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
- push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server,nowait";
+ push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
push @$cmd, '-mon', "chardev=qmp,mode=control";
if (min_version($machine_version, 2, 12)) {
die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
my ($path, $format);
+ my $read_only_str = '';
if (my $efidisk = $conf->{efidisk0}) {
my $d = parse_drive('efidisk0', $efidisk);
my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
die "efidisk format must be specified\n"
if !defined($format);
}
+
+ $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
} else {
warn "no efidisk configured! Using temporary efivars disk.\n";
$path = "/tmp/$vmid-ovmf.fd";
$size_str = ",size=" . (-s $ovmf_vars);
}
- push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly,file=$ovmf_code";
- push @$cmd, '-drive', "if=pflash,unit=1,format=$format,id=drive-efidisk0$size_str,file=$path";
+ # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
+ my $cache = "";
+ if ($path =~ m/^rbd:/) {
+ $cache = ',cache=writeback';
+ $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
+ }
+
+ push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
+ push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
}
# load q35 config
if (my $path = $conf->{"serial$i"}) {
if ($path eq 'socket') {
my $socket = "/var/run/qemu-server/${vmid}.serial$i";
- push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server,nowait";
+ push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
# On aarch64, serial0 is the UART device. Qemu only allows
# connecting UART devices via the '-serial' command line, as
# the device has a fixed slot on the hardware...
push @$devices, @$audio_devs;
}
+ add_tpm_device($vmid, $devices, $conf);
+
my $sockets = 1;
$sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
$sockets = $conf->{sockets} if $conf->{sockets};
push @$devices, '-device', print_vga_device(
$conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
- push @$cmd, '-vnc', "unix:$socket,password";
+ push @$cmd, '-vnc', "unix:$socket,password=on";
} else {
push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
push @$cmd, '-nographic';
if ($guest_agent->{enabled}) {
my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
- push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0";
+ push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
my ($ds, $drive) = @_;
if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
+ check_volume_storage_type($storecfg, $drive->{file});
push @$vollist, $drive->{file};
}
# ignore efidisk here, already added in bios/fw handling code above
return if $drive->{interface} eq 'efidisk';
+ # similar for TPM
+ return if $drive->{interface} eq 'tpmstate';
$use_virtio = 1 if $ds =~ m/^virtio/;
push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
}
- my $drive_cmd = print_drive_commandline_full($storecfg, $vmid, $drive, $pbs_name);
- $drive_cmd .= ',readonly' if PVE::QemuConfig->is_template($conf);
+ my $drive_cmd = print_drive_commandline_full(
+ $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
+
+ # extra protection for templates, but SATA and IDE don't support it..
+ $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
push @$devices, '-drive',$drive_cmd;
push @$devices, '-device', print_drivedevice_full(
print "activating and using '$vmstate' as vmstate\n";
}
+ if (PVE::QemuConfig->is_template($conf)) {
+ # needed to workaround base volumes being read-only
+ push @$cmd, '-snapshot';
+ }
+
# add custom args
if ($conf->{args}) {
my $aa = PVE::Tools::split_args($conf->{args});
die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
-
qemu_devicedel($vmid, $deviceid);
-
} elsif ($deviceid =~ m/^usb\d+$/) {
-
die "usb hotplug currently not reliable\n";
# when unplugging usb devices this way, there may be remaining usb
# controllers/hubs so we disable it for now
#qemu_devicedel($vmid, $deviceid);
#qemu_devicedelverify($vmid, $deviceid);
-
} elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
+ my $device = parse_drive($deviceid, $conf->{$deviceid});
- qemu_devicedel($vmid, $deviceid);
- qemu_devicedelverify($vmid, $deviceid);
- qemu_drivedel($vmid, $deviceid);
- qemu_iothread_del($conf, $vmid, $deviceid);
-
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_drivedel($vmid, $deviceid);
+ qemu_iothread_del($vmid, $deviceid, $device);
} elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
-
qemu_devicedel($vmid, $deviceid);
qemu_devicedelverify($vmid, $deviceid);
- qemu_iothread_del($conf, $vmid, $deviceid);
-
} elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
+ my $device = parse_drive($deviceid, $conf->{$deviceid});
- qemu_devicedel($vmid, $deviceid);
- qemu_drivedel($vmid, $deviceid);
+ qemu_devicedel($vmid, $deviceid);
+ qemu_drivedel($vmid, $deviceid);
qemu_deletescsihw($conf, $vmid, $deviceid);
+ qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
+ if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
} elsif ($deviceid =~ m/^(net)(\d+)$/) {
-
- qemu_devicedel($vmid, $deviceid);
- qemu_devicedelverify($vmid, $deviceid);
- qemu_netdevdel($vmid, $deviceid);
-
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
+ qemu_netdevdel($vmid, $deviceid);
} else {
die "can't unplug device '$deviceid'\n";
}
}
sub qemu_iothread_add {
- my($vmid, $deviceid, $device) = @_;
+ my ($vmid, $deviceid, $device) = @_;
if ($device->{iothread}) {
my $iothreads = vm_iothreads_list($vmid);
}
sub qemu_iothread_del {
- my($conf, $vmid, $deviceid) = @_;
+ my ($vmid, $deviceid, $device) = @_;
- my $confid = $deviceid;
- if ($deviceid =~ m/^(?:virtioscsi|scsihw)(\d+)$/) {
- $confid = 'scsi' . $1;
- }
- my $device = parse_drive($confid, $conf->{$confid});
if ($device->{iothread}) {
my $iothreads = vm_iothreads_list($vmid);
qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
}
sub qemu_objectadd {
- my($vmid, $objectid, $qomtype) = @_;
+ my ($vmid, $objectid, $qomtype) = @_;
mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
}
sub qemu_objectdel {
- my($vmid, $objectid) = @_;
+ my ($vmid, $objectid) = @_;
mon_cmd($vmid, "object-del", id => $objectid);
sub qemu_driveadd {
my ($storecfg, $vmid, $device) = @_;
- my $drive = print_drive_commandline_full($storecfg, $vmid, $device);
+ my $kvmver = get_running_qemu_version($vmid);
+ my $io_uring = min_version($kvmver, 6, 0);
+ my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
$drive =~ s/\\/\\\\/g;
my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
}
sub qemu_drivedel {
- my($vmid, $deviceid) = @_;
+ my ($vmid, $deviceid) = @_;
my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
$ret =~ s/^\s+//;
my $scsihwid="$controller_prefix$controller";
my $devices_list = vm_devices_list($vmid);
- if(!defined($devices_list->{$scsihwid})) {
+ if (!defined($devices_list->{$scsihwid})) {
vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
}
foreach my $opt (keys %{$devices_list}) {
if (is_valid_drivename($opt)) {
my $drive = parse_drive($opt, $conf->{$opt});
- if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
+ if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
return 1;
}
}
$volhash->{$volid}->{is_vmstate} //= 0;
$volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
+ $volhash->{$volid}->{is_tpmstate} //= 0;
+ $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
+
$volhash->{$volid}->{is_unused} //= 0;
$volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
+
+ $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
};
my $include_opts = {
die "skip\n" if !$hotplug_features->{memory};
PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
} elsif ($opt eq 'cpuunits') {
- $cgroup->change_cpu_shares(undef, $defaults->{cpuunits});
+ $cgroup->change_cpu_shares(undef, 1024);
} elsif ($opt eq 'cpulimit') {
$cgroup->change_cpu_quota(-1, 100000);
} else {
vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
$vmid, $opt, $value, $arch, $machine_type);
} elsif (is_valid_drivename($opt)) {
- die "skip\n" if $opt eq 'efidisk0';
+ die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
# some changes can be done without hotplug
my $drive = parse_drive($opt, $value);
if (drive_is_cloudinit($drive)) {
die "skip\n" if !$hotplug_features->{memory};
$value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
} elsif ($opt eq 'cpuunits') {
- $cgroup->change_cpu_shares($conf->{pending}->{$opt}, $defaults->{cpuunits});
+ $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
} elsif ($opt eq 'cpulimit') {
my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
$cgroup->change_cpu_quota($cpulimit, 100000);
$conf = PVE::QemuConfig->load_config($vmid); # update/reload
}
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
+ # don't regenerate the ISO if the VM is started as part of a live migration
+ # this way we can reuse the old ISO with the correct config
+ PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
my $defaults = load_defaults();
# timeout should be more than enough here...
PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
- my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits}
- : $defaults->{cpuunits};
+ my $cpuunits = get_cpuunits($conf);
my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
my %run_params = (
my %properties = (
Slice => 'qemu.slice',
- KillMode => 'none'
+ KillMode => 'process',
+ SendSIGKILL => 0,
+ TimeoutStopUSec => ULONG_MAX, # infinity
);
if (PVE::CGroup::cgroup_mode() == 2) {
+ $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
$properties{CPUWeight} = $cpuunits;
} else {
$properties{CPUShares} = $cpuunits;
PVE::Tools::run_fork sub {
PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties);
+ my $tpmpid;
+ if (my $tpm = $conf->{tpmstate0}) {
+ # start the TPM emulator so QEMU can connect on start
+ $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
+ }
+
my $exitcode = run_command($cmd, %run_params);
- die "QEMU exited with code $exitcode\n" if $exitcode;
+ if ($exitcode) {
+ kill 'TERM', $tpmpid if $tpmpid;
+ die "QEMU exited with code $exitcode\n";
+ }
};
};
if (!$keepActive) {
my $vollist = get_vm_volumes($conf);
PVE::Storage::deactivate_volumes($storecfg, $vollist);
+
+ if (my $tpmdrive = $conf->{tpmstate0}) {
+ my $tpm = parse_drive("tpmstate0", $tpmdrive);
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+ if ($storeid) {
+ PVE::Storage::unmap_volume($storecfg, $tpm->{file});
+ }
+ }
}
foreach my $ext (qw(mon qmp pid vnc qga)) {
return $map;
};
-my $restore_update_config_line = sub {
- my ($cookie, $vmid, $map, $line, $unique) = @_;
+sub restore_update_config_line {
+ my ($cookie, $map, $line, $unique) = @_;
return '' if $line =~ m/^\#qmdump\#/;
return '' if $line =~ m/^\#vzdump\#/;
$net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
$netstr = print_net($net);
$res .= "$id: $netstr\n";
- } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk)\d+):\s*(\S+)\s*$/) {
+ } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
my $virtdev = $1;
my $value = $3;
my $di = parse_drive($virtdev, $value);
}
return $res;
-};
+}
my $restore_deactivate_volumes = sub {
my ($storecfg, $devinfo) = @_;
sub scan_volids {
my ($cfg, $vmid) = @_;
- my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid);
+ my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
my $volid_hash = {};
foreach my $storeid (keys %$info) {
my $cfg = PVE::Storage::config();
- # FIXME: Remove once our RBD plugin can handle CT and VM on a single storage
- # see: https://pve.proxmox.com/pipermail/pve-devel/2018-July/032900.html
- foreach my $stor (keys %{$cfg->{ids}}) {
- delete($cfg->{ids}->{$stor}) if ! $cfg->{ids}->{$stor}->{content}->{images};
- }
-
print "rescan volumes...\n";
my $volid_hash = scan_volids($cfg, $vmid);
# allocate volumes
my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
- if (!$options->{live}) {
- foreach my $virtdev (sort keys %$virtdev_hash) {
- my $d = $virtdev_hash->{$virtdev};
- next if $d->{is_cloudinit}; # no need to restore cloudinit
-
- my $volid = $d->{volid};
+ foreach my $virtdev (sort keys %$virtdev_hash) {
+ my $d = $virtdev_hash->{$virtdev};
+ next if $d->{is_cloudinit}; # no need to restore cloudinit
- my $path = PVE::Storage::path($storecfg, $volid);
+ # this fails if storage is unavailable
+ my $volid = $d->{volid};
+ my $path = PVE::Storage::path($storecfg, $volid);
- my $pbs_restore_cmd = [
- '/usr/bin/pbs-restore',
- '--repository', $repo,
- $pbs_backup_name,
- "$d->{devname}.img.fidx",
- $path,
- '--verbose',
- ];
+ # for live-restore we only want to preload the efidisk and TPM state
+ next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
- push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
- push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
+ my $pbs_restore_cmd = [
+ '/usr/bin/pbs-restore',
+ '--repository', $repo,
+ $pbs_backup_name,
+ "$d->{devname}.img.fidx",
+ $path,
+ '--verbose',
+ ];
- if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
- push @$pbs_restore_cmd, '--skip-zero';
- }
+ push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
+ push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
- my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
- print "restore proxmox backup image: $dbg_cmdstring\n";
- run_command($pbs_restore_cmd);
+ if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
+ push @$pbs_restore_cmd, '--skip-zero';
}
+
+ my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
+ print "restore proxmox backup image: $dbg_cmdstring\n";
+ run_command($pbs_restore_cmd);
}
$fh->seek(0, 0) || die "seek failed - $!\n";
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $new_conf_raw .= $restore_update_config_line->(
+ $new_conf_raw .= restore_update_config_line(
$cookie,
- $vmid,
$map,
$line,
$options->{unique},
die $err;
}
+ if ($options->{live}) {
+ # keep lock during live-restore
+ $new_conf_raw .= "\nlock: create";
+ }
+
PVE::Tools::file_set_contents($conffile, $new_conf_raw);
PVE::Cluster::cfs_update(); # make sure we read new file
PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
if ($options->{live}) {
- eval {
- # enable interrupts
- local $SIG{INT} =
- local $SIG{TERM} =
- local $SIG{QUIT} =
- local $SIG{HUP} =
- local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
+ # enable interrupts
+ local $SIG{INT} =
+ local $SIG{TERM} =
+ local $SIG{QUIT} =
+ local $SIG{HUP} =
+ local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
- my $conf = PVE::QemuConfig->load_config($vmid);
- die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
+ my $conf = PVE::QemuConfig->load_config($vmid);
+ die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
- pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
- };
+ # these special drives are already restored before start
+ delete $devinfo->{'drive-efidisk0'};
+ delete $devinfo->{'drive-tpmstate0-backup'};
+ pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
- $err = $@;
- if ($err) {
- warn "destroying partially live-restored VM, all temporary data will be lost!\n";
- $restore_deactivate_volumes->($storecfg, $devinfo);
- $restore_destroy_volumes->($storecfg, $devinfo);
- PVE::QemuConfig->destroy_config($vmid);
- die $err;
- }
+ PVE::QemuConfig->remove_lock($vmid, "create");
}
}
sub pbs_live_restore {
my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
- print "Starting VM for live-restore\n";
+ print "starting VM for live-restore\n";
+ print "repository: '$repo', snapshot: '$snap'\n";
my $pbs_backing = {};
for my $ds (keys %$restored_disks) {
$ds =~ m/^drive-(.*)$/;
- $pbs_backing->{$1} = {
+ my $confname = $1;
+ $pbs_backing->{$confname} = {
repository => $repo,
snapshot => $snap,
archive => "$ds.img.fidx",
};
- $pbs_backing->{$1}->{keyfile} = $keyfile if -e $keyfile;
+ $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
+
+ my $drive = parse_drive($confname, $conf->{$confname});
+ print "restoring '$ds' to '$drive->{file}'\n";
}
my $drives_streamed = 0;
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $new_conf_raw .= $restore_update_config_line->(
+ $new_conf_raw .= restore_update_config_line(
$cookie,
- $vmid,
$map,
$line,
$opts->{unique},
my $cookie = { netcount => 0 };
while (defined (my $line = <$srcfd>)) {
- $new_conf_raw .= $restore_update_config_line->(
+ $new_conf_raw .= restore_update_config_line(
$cookie,
- $vmid,
$map,
$line,
$opts->{unique},
my $total_h = render_bytes($size, 1);
my $transferred_h = render_bytes($transferred, 1);
- print "transferred $transferred_h of $total_h ($percent%)";
+ print "transferred $transferred_h of $total_h ($percent%)\n";
}
};
$size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
} elsif ($drivename eq 'efidisk0') {
$size = get_efivars_size($conf);
+ } elsif ($drivename eq 'tpmstate0') {
+ $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
} else {
($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
}
}
} else {
+ die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
+
my $kvmver = get_running_qemu_version ($vmid);
if (!min_version($kvmver, 2, 7)) {
die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
return;
}
+sub update_tpmstate_size {
+ my ($conf) = @_;
+
+ my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
+ $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
+ $conf->{tpmstate0} = print_drive($disk);
+}
+
sub create_efidisk($$$$$) {
my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_;
return $qmpstatus && $qmpstatus->{status} eq "paused";
}
+sub check_volume_storage_type {
+ my ($storecfg, $vol) = @_;
+
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
+
+ die "storage '$storeid' does not support content-type '$vtype'\n"
+ if !$scfg->{content}->{$vtype};
+
+ return 1;
+}
+
1;