use IO::Socket::UNIX;
use IPC::Open3;
use JSON;
+use List::Util qw(first);
use MIME::Base64;
use POSIX;
use Storable qw(dclone);
use PVE::QemuServer::Memory;
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
-use PVE::QemuServer::USB qw(parse_usb_device);
+use PVE::QemuServer::USB;
my $have_sdn;
eval {
});
PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
- description => "Specifies the Qemu machine type.",
+ description => "Specifies the QEMU machine type.",
type => 'string',
pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
maxLength => 40,
my $agent_fmt = {
enabled => {
- description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
+ description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
type => 'boolean',
default => 0,
default_key => 1,
description => "Run fstrim after moving a disk or migrating the VM.",
type => 'boolean',
optional => 1,
- default => 0
+ default => 0,
+ },
+ 'freeze-fs-on-backup' => {
+ description => "Freeze/thaw guest filesystems on backup for consistency.",
+ type => 'boolean',
+ optional => 1,
+ default => 1,
},
type => {
description => "Select the agent type",
memory => {
optional => 1,
type => 'integer',
- description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
+ description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
." you use the balloon device.",
minimum => 16,
default => 512,
balloon => {
optional => 1,
type => 'integer',
- description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
+ description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
minimum => 0,
},
shares => {
win10;; Microsoft Windows 10/2016/2019
win11;; Microsoft Windows 11/2022
l24;; Linux 2.4 Kernel
-l26;; Linux 2.6 - 5.X Kernel
+l26;; Linux 2.6 - 6.X Kernel
solaris;; Solaris/OpenSolaris/OpenIndiania kernel
EODESC
},
},
agent => {
optional => 1,
- description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
+ description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
type => 'string',
format => $agent_fmt,
},
verbose_description => <<EODESCR,
Arbitrary arguments passed to kvm, for example:
-args: -no-reboot -no-hpet
+args: -no-reboot -smbios 'type=0,vendor=FOO'
NOTE: this option is for experts only.
EODESCR
network => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all network data passed to the VM via'
- .' cloud-init.',
+ description => 'To pass a custom file containing all network data to the VM via cloud-init.',
format => 'pve-volume-id',
format_description => 'volume',
},
user => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all user data passed to the VM via'
- .' cloud-init.',
+ description => 'To pass a custom file containing all user data to the VM via cloud-init.',
format => 'pve-volume-id',
format_description => 'volume',
},
vendor => {
- type => 'string',
- optional => 1,
- description => 'Specify a custom file containing all vendor data passed to the VM via'
- .' cloud-init.',
- format => 'pve-volume-id',
- format_description => 'volume',
+ type => 'string',
+ optional => 1,
+ description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
+ format => 'pve-volume-id',
+ format_description => 'volume',
},
};
PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
.' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
.' support hashed passwords.',
},
+ ciupgrade => {
+ optional => 1,
+ type => 'boolean',
+ description => 'cloud-init: do an automatic package upgrade after the first boot.'
+ },
cicustom => {
optional => 1,
type => 'string',
PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
}
-my $MAX_USB_DEVICES = 14;
my $MAX_NETS = 32;
my $MAX_SERIAL_PORTS = 4;
my $MAX_PARALLEL_PORTS = 3;
return $volid;
}
-my $usb_fmt = {
- host => {
- default_key => 1,
- type => 'string', format => 'pve-qm-usb-device',
- format_description => 'HOSTUSBDEVICE|spice',
- description => <<EODESCR,
-The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
-
- 'bus-port(.port)*' (decimal numbers) or
- 'vendor_id:product_id' (hexadeciaml numbers) or
- 'spice'
-
-You can use the 'lsusb -t' command to list existing usb devices.
-
-NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
-machines - use with special care.
-
-The value 'spice' can be used to add a usb redirection devices for spice.
-EODESCR
- },
- usb3 => {
- optional => 1,
- type => 'boolean',
- description => "Specifies whether if given host option is a USB3 device or port."
- ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
- ." is irrelevant (all devices are plugged into a xhci controller).",
- default => 0,
- },
-};
-
-my $usbdesc = {
- optional => 1,
- type => 'string', format => $usb_fmt,
- description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
- ." l26 or windows > 7, n can be up to 14).",
-};
-PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
-
my $serialdesc = {
optional => 1,
type => 'string',
$confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
}
-for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
- $confdesc->{"usb$i"} = $usbdesc;
+for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
+ $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
}
my $boot_fmt = {
my $cdrom_path;
sub get_cdrom_path {
- return $cdrom_path if $cdrom_path;
+ return $cdrom_path if defined($cdrom_path);
+
+ $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
+
+ if (!defined($cdrom_path)) {
+ log_warn("no physical CD-ROM available, ignoring");
+ $cdrom_path = '';
+ }
- return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
- return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
- return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
+ return $cdrom_path;
}
sub get_iso_path {
return $initiator;
}
+my sub storage_allows_io_uring_default {
+ my ($scfg, $cache_direct) = @_;
+
+ # io_uring with cache mode writeback or writethrough on krbd will hang...
+ return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
+
+ # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
+ # sometimes, just plain disable...
+ return if $scfg && $scfg->{type} eq 'lvm';
+
+ # io_uring causes problems when used with CIFS since kernel 5.15
+ # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
+ return if $scfg && $scfg->{type} eq 'cifs';
+
+ return 1;
+}
+
+my sub drive_uses_cache_direct {
+ my ($drive, $scfg) = @_;
+
+ my $cache_direct = 0;
+
+ if (my $cache = $drive->{cache}) {
+ $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
+ } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
+ $cache_direct = 1;
+ }
+
+ return $cache_direct;
+}
+
sub print_drive_commandline_full {
my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
$opts .= ",format=$format";
}
- my $cache_direct = 0;
-
- if (my $cache = $drive->{cache}) {
- $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
- } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
- $opts .= ",cache=none";
- $cache_direct = 1;
- }
-
- # io_uring with cache mode writeback or writethrough on krbd will hang...
- my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
-
- # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
- # sometimes, just plain disable...
- my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
+ my $cache_direct = drive_uses_cache_direct($drive, $scfg);
- # io_uring causes problems when used with CIFS since kernel 5.15
- # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
- my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
+ $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
if (!$drive->{aio}) {
- if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
+ if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
# io_uring supports all cache modes
$opts .= ",aio=io_uring";
} else {
return;
}
-PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
-sub verify_usb_device {
- my ($value, $noerr) = @_;
-
- return $value if parse_usb_device($value);
-
- return if $noerr;
-
- die "unable to parse usb device\n";
-}
-
# add JSON properties for create and set function
sub json_config_properties {
my ($prop, $with_disk_alloc) = @_;
my $conf = PVE::QemuConfig->load_config($vmid);
- PVE::QemuConfig->check_lock($conf) if !$skiplock;
+ if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
+ PVE::QemuConfig->check_lock($conf);
+ }
if ($conf->{template}) {
# check if any base image is still used by a linked clone
sub check_running {
my ($vmid, $nocheck, $node) = @_;
+ # $nocheck is set when called during a migration, in which case the config
+ # file might still or already reside on the *other* node
+ # - because rename has already happened, and current node is source
+ # - because rename hasn't happened yet, and current node is target
+ # - because rename has happened, current node is target, but hasn't yet
+ # processed it yet
PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
return PVE::QemuServer::Helpers::vm_running_locally($vmid);
}
our $vmstatus_return_properties = {
vmid => get_standard_option('pve-vmid'),
status => {
- description => "Qemu process status.",
+ description => "QEMU process status.",
type => 'string',
enum => ['stopped', 'running'],
},
optional => 1,
},
qmpstatus => {
- description => "Qemu QMP agent status.",
+ description => "VM run state from the 'query-status' QMP monitor command.",
type => 'string',
optional => 1,
},
});
}
+ # Used to distinguish different invocations in the log.
+ my $log_prefix = "[id=" . int(time()) . "] ";
+
my $emulator_cmd = [
"swtpm",
"socket",
"file=$paths->{pid}",
"--terminate", # terminate on QEMU disconnect
"--daemon",
+ "--log",
+ "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
];
push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
run_command($emulator_cmd, outfunc => sub { print $1; });
or die "no OVMF images known for architecture '$arch'\n";
my $type = 'default';
- if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
+ if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
$type = $smm ? "4m" : "4m-no-smm";
$type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
}
- return $types->{$type}->@*;
+ my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
+ die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
+ die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
+
+ return ($ovmf_code, $ovmf_vars);
}
my $Arch2Qemu = {
# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
my sub should_disable_smm {
- my ($conf, $vga) = @_;
+ my ($conf, $vga, $machine) = @_;
+
+ return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
$vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
}
+my sub print_ovmf_drive_commandlines {
+ my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
+
+ my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
+
+ my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
+
+ my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
+ if ($d) {
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
+ my ($path, $format) = $d->@{'file', 'format'};
+ if ($storeid) {
+ $path = PVE::Storage::path($storecfg, $d->{file});
+ if (!defined($format)) {
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $format = qemu_img_format($scfg, $volname);
+ }
+ } elsif (!defined($format)) {
+ die "efidisk format must be specified\n";
+ }
+ # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
+ if ($path =~ m/^rbd:/) {
+ $var_drive_str .= ',cache=writeback';
+ $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
+ }
+ $var_drive_str .= ",format=$format,file=$path";
+
+ $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
+ $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
+ } else {
+ log_warn("no efidisk configured! Using temporary efivars disk.");
+ my $path = "/tmp/$vmid-ovmf.fd";
+ PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
+ $var_drive_str .= ",format=raw,file=$path";
+ $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
+ }
+
+ return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
+}
+
sub config_to_command {
my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
$pbs_backing) = @_;
}
if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
- my $d;
- if (my $efidisk = $conf->{efidisk0}) {
- $d = parse_drive('efidisk0', $efidisk);
- }
-
- my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
- die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
-
- my ($path, $format);
- my $read_only_str = '';
- if ($d) {
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
- $format = $d->{format};
- if ($storeid) {
- $path = PVE::Storage::path($storecfg, $d->{file});
- if (!defined($format)) {
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
- }
- } else {
- $path = $d->{file};
- die "efidisk format must be specified\n"
- if !defined($format);
- }
-
- $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
- } else {
- log_warn("no efidisk configured! Using temporary efivars disk.");
- $path = "/tmp/$vmid-ovmf.fd";
- PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
- $format = 'raw';
- }
-
- my $size_str = "";
-
- if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
- $size_str = ",size=" . (-s $ovmf_vars);
- }
-
- # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
- my $cache = "";
- if ($path =~ m/^rbd:/) {
- $cache = ',cache=writeback';
- $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
- }
-
- push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
- push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
+ my ($code_drive_str, $var_drive_str) =
+ print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
+ push $cmd->@*, '-drive', $code_drive_str;
+ push $cmd->@*, '-drive', $var_drive_str;
}
if ($q35) { # tell QEMU to load q35 config early
# add usb controllers
my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
- $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version);
+ $conf, $bridges, $arch, $machine_type, $machine_version);
push @$devices, @usbcontrollers if @usbcontrollers;
my $vga = parse_vga($conf->{vga});
$usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
- $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version);
+ $conf, $usb_dev_features, $bootorder, $machine_version);
push @$devices, @usbdevices if @usbdevices;
# serial devices
if ($path eq 'socket') {
my $socket = "/var/run/qemu-server/${vmid}.serial$i";
push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
- # On aarch64, serial0 is the UART device. Qemu only allows
+ # On aarch64, serial0 is the UART device. QEMU only allows
# connecting UART devices via the '-serial' command line, as
# the device has a fixed slot on the hardware...
if ($arch eq 'aarch64' && $i == 0) {
}
} else {
die "no such serial device\n" if ! -c $path;
- push @$devices, '-chardev', "tty,id=serial$i,path=$path";
+ push @$devices, '-chardev', "serial,id=serial$i,path=$path";
push @$devices, '-device', "isa-serial,chardev=serial$i";
}
}
for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
if (my $path = $conf->{"parallel$i"}) {
die "no such parallel device\n" if ! -c $path;
- my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
+ my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
push @$devices, '-device', "isa-parallel,chardev=parallel$i";
}
push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
- push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
+ push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
if ($winversion >= 6) {
push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
- push @$cmd, '-no-hpet';
+ push @$machineFlags, 'hpet=off';
}
push @$rtcFlags, 'driftfix=slew' if $tdf;
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config(
+ $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
push @$cmd, '-S' if $conf->{freeze};
push @$machineFlags, 'accel=tcg';
}
- push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
+ push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
my $machine_type_min = $machine_type;
if ($add_pve_version) {
my $to_check = [];
for my $d (@$devices_to_check) {
$devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
- next if !$d->{'pci_bridge'};
+ next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
$devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
push @$to_check, @{$d->{'pci_bridge'}->{devices}};
qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
}
- # print_usbdevice_full expects the parsed device
- my $d = parse_usb_device($device->{host});
- $d->{usb3} = $device->{usb3};
-
# add the new one
- vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
+ vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
}
sub qemu_cpu_hotplug {
my $running = check_running($vmid);
- $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
+ PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
return if !$running;
}
my $fast_plug_option = {
+ 'description' => 1,
+ 'hookscript' => 1,
'lock' => 1,
+ 'migrate_downtime' => 1,
+ 'migrate_speed' => 1,
'name' => 1,
'onboot' => 1,
+ 'protection' => 1,
'shares' => 1,
'startup' => 1,
- 'description' => 1,
- 'protection' => 1,
- 'vmstatestorage' => 1,
- 'hookscript' => 1,
'tags' => 1,
+ 'vmstatestorage' => 1,
};
for my $opt (keys %$confdesc_cloudinit) {
vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
} elsif ($opt =~ m/^memory$/) {
die "skip\n" if !$hotplug_features->{memory};
- PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
+ PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
} elsif ($opt eq 'cpuunits') {
$cgroup->change_cpu_shares(undef);
} elsif ($opt eq 'cpulimit') {
} elsif ($opt =~ m/^usb(\d+)$/) {
my $index = $1;
die "skip\n" if !$usb_hotplug;
- my $d = eval { parse_property_string($usbdesc->{format}, $value) };
+ my $d = eval { parse_property_string('pve-qm-usb', $value) };
my $id = $opt;
- if ($d->{host} eq 'spice') {
+ if ($d->{host} =~ m/^spice$/i) {
$id = "usbredirdev$index";
}
qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
$vmid, $opt, $value, $arch, $machine_type);
} elsif ($opt =~ m/^memory$/) { #dimms
die "skip\n" if !$hotplug_features->{memory};
- $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
+ $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
} elsif ($opt eq 'cpuunits') {
my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
$cgroup->change_cpu_shares($new_cpuunits);
# unplug xhci controller if no usb device is left
if ($usb_hotplug) {
my $has_usb = 0;
- for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
+ for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
next if !defined($conf->{"usb$i"});
$has_usb = 1;
last;
if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
+ safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
!($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
# for non online change, we try to hot-unplug
# update existing disk
# skip non hotpluggable value
- if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
+ if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
+ safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
# skiplock => 0/1, skip checking for config lock
# skiptemplate => 0/1, skip checking whether VM is template
-# forcemachine => to force Qemu machine (rollback/migration)
+# forcemachine => to force QEMU machine (rollback/migration)
# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
# timeout => in seconds
# paused => start VM in paused state (backup)
my $defaults = load_defaults();
# set environment variable useful inside network script
- $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
+ # for remote migration the config is available on the target node!
+ if (!$migrate_opts->{remote_node}) {
+ $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
+ }
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
return $migration_ip;
};
- my $migrate_uri;
if ($statefile) {
if ($statefile eq 'tcp') {
- my $localip = "localhost";
+ my $migrate = $res->{migrate} = { proto => 'tcp' };
+ $migrate->{addr} = "localhost";
my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
my $nodename = nodename();
}
if ($migration_type eq 'insecure') {
- $localip = $get_migration_ip->($nodename);
- $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
+ $migrate->{addr} = $get_migration_ip->($nodename);
+ $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
}
my $pfamily = PVE::Tools::get_host_address_family($nodename);
- my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
- $migrate_uri = "tcp:${localip}:${migrate_port}";
- push @$cmd, '-incoming', $migrate_uri;
+ $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
+ $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
+ push @$cmd, '-incoming', $migrate->{uri};
push @$cmd, '-S';
} elsif ($statefile eq 'unix') {
# should be default for secure migrations as a ssh TCP forward
# tunnel is not deterministic reliable ready and fails regurarly
# to set up in time, so use UNIX socket forwards
- my $socket_addr = "/run/qemu-server/$vmid.migrate";
- unlink $socket_addr;
-
- $migrate_uri = "unix:$socket_addr";
+ my $migrate = $res->{migrate} = { proto => 'unix' };
+ $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
+ unlink $migrate->{addr};
- push @$cmd, '-incoming', $migrate_uri;
+ $migrate->{uri} = "unix:$migrate->{addr}";
+ push @$cmd, '-incoming', $migrate->{uri};
push @$cmd, '-S';
} elsif (-e $statefile) {
for my $dev ($d->{pciid}->@*) {
my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
- # nvidia grid needs the uuid of the mdev as qemu parameter
+ # nvidia grid needs the qemu parameter '-uuid' set
+ # use smbios uuid or mdev uuid as fallback for that
if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
- $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
+ if (defined($conf->{smbios1})) {
+ my $smbios_conf = parse_smbios1($conf->{smbios1});
+ $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
+ }
+ $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id) if !defined($uuid);
}
}
}
if ($conf->{hugepages}) {
my $code = sub {
- my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
+ my $hotplug_features =
+ parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+ my $hugepages_topology =
+ PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
+
my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
PVE::QemuServer::Memory::hugepages_mount();
eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
warn $@ if $@;
- print "migration listens on $migrate_uri\n" if $migrate_uri;
- $res->{migrate_uri} = $migrate_uri;
-
- if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
+ if (defined($res->{migrate})) {
+ print "migration listens on $res->{migrate}->{uri}\n";
+ } elsif ($statefile) {
eval { mon_cmd($vmid, "cont"); };
warn $@ if $@;
}
my $migrate_storage_uri;
# nbd_protocol_version > 0 for unix socket support
- if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
+ if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
$migrate_storage_uri = "nbd:unix:$socket_path";
+ $res->{migrate}->{unix_sockets} = [$socket_path];
} else {
my $nodename = nodename();
my $localip = $get_migration_ip->($nodename);
$migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
}
- $res->{migrate_storage_uri} = $migrate_storage_uri;
+ my $block_info = mon_cmd($vmid, "query-block");
+ $block_info = { map { $_->{device} => $_ } $block_info->@* };
foreach my $opt (sort keys %$nbd) {
my $drivestr = $nbd->{$opt}->{drivestr};
my $volid = $nbd->{$opt}->{volid};
- mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
+
+ my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
+
+ mon_cmd(
+ $vmid,
+ "block-export-add",
+ id => "drive-$opt",
+ 'node-name' => $block_node,
+ writable => JSON::true,
+ type => "nbd",
+ name => "drive-$opt", # NBD export name
+ );
+
my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
print "storage migration listens on $nbd_uri volume:$drivestr\n";
print "re-using replicated volume: $opt - $volid\n"
add_nets_bridge_fdb($conf, $vmid);
}
- mon_cmd($vmid, 'qom-set',
+ if (!defined($conf->{balloon}) || $conf->{balloon}) {
+ eval {
+ mon_cmd(
+ $vmid,
+ 'qom-set',
path => "machine/peripheral/balloon0",
property => "guest-stats-polling-interval",
- value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
+ value => 2
+ );
+ };
+ log_warn("could not set polling interval for ballooning - $@") if $@;
+ }
if ($resume) {
print "Resumed VM, removing state\n";
# NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
# don't want to break ABI just for this two liner
my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
+
+ # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
+ # out when we do it first. so wait for 10 seconds and then try it
+ my $pciid = $d->{pciid}->[0]->{id};
+ my $info = PVE::SysFSTools::pci_device_info("$pciid");
+ if ($info->{vendor} eq '10de') {
+ sleep 10;
+ }
+
PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
}
}
}
}
+# $nocheck is set when called as part of a migration - in this context the
+# location of the config file (source or target node) is not deterministic,
+# since migration cannot wait for pmxcfs to process the rename
sub vm_resume {
my ($vmid, $skiplock, $nocheck) = @_;
my $res = mon_cmd($vmid, 'query-status');
my $resume_cmd = 'cont';
my $reset = 0;
- my $conf = PVE::QemuConfig->load_config($vmid);
+ my $conf;
+ if ($nocheck) {
+ $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
+ if ($@) {
+ my $vmlist = PVE::Cluster::get_vmlist();
+ if (exists($vmlist->{ids}->{$vmid})) {
+ my $node = $vmlist->{ids}->{$vmid}->{node};
+ $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
+ }
+ if (!$conf) {
+ PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
+ $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
+ }
+ }
+ } else {
+ $conf = PVE::QemuConfig->load_config($vmid);
+ }
if ($res->{status}) {
return if $res->{status} eq 'running'; # job done, go home
}
if (!$nocheck) {
-
PVE::QemuConfig->check_lock($conf)
if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
}
});
}
+sub check_bridge_access {
+ my ($rpcenv, $authuser, $conf) = @_;
+
+ return 1 if $authuser eq 'root@pam';
+
+ for my $opt (sort keys $conf->%*) {
+ next if $opt !~ m/^net\d+$/;
+ my $net = parse_net($conf->{$opt});
+ my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
+ PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
+ }
+ return 1;
+};
+
+sub check_mapping_access {
+ my ($rpcenv, $user, $conf) = @_;
+
+ for my $opt (keys $conf->%*) {
+ if ($opt =~ m/^usb\d+$/) {
+ my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
+ if (my $host = $device->{host}) {
+ die "only root can set '$opt' config for real devices\n"
+ if $host !~ m/^spice$/i && $user ne 'root@pam';
+ } elsif ($device->{mapping}) {
+ $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
+ } else {
+ die "either 'host' or 'mapping' must be set.\n";
+ }
+ }
+ }
+};
+
+# FIXME: improve checks on restore by checking before actually extracing and
+# merging the new config
+sub check_restore_permissions {
+ my ($rpcenv, $user, $conf) = @_;
+ check_bridge_access($rpcenv, $user, $conf);
+ check_mapping_access($rpcenv, $user, $conf);
+}
# vzdump restore implementaion
sub tar_archive_read_firstfile {
}
my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
+ check_restore_permissions($rpcenv, $user, $new_conf);
PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
}
my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
+ check_restore_permissions($rpcenv, $user, $new_conf);
PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
if ($@) {
- warn "Qemu Guest Agent is not running - $@" if !$nowarn;
+ warn "QEMU Guest Agent is not running - $@" if !$nowarn;
return 0;
}
return 1;
}
sub qemu_img_convert {
- my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
+ my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
my $storecfg = PVE::Storage::config();
my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
if $snapname && $src_format && $src_format eq "qcow2";
push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
push @$cmd, '-T', $cachemode if defined($cachemode);
+ push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
if ($src_is_iscsi) {
push @$cmd, '--image-opts';
}
}
+# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
+# source, but some storages have problems with io_uring, sometimes even leading to crashes.
+my sub clone_disk_check_io_uring {
+ my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
+
+ return if !$use_drive_mirror;
+
+ # Don't complain when not changing storage.
+ # Assume if it works for the source, it'll work for the target too.
+ return if $src_storeid eq $dst_storeid;
+
+ my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
+ my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
+
+ my $cache_direct = drive_uses_cache_direct($src_drive);
+
+ my $src_uses_io_uring;
+ if ($src_drive->{aio}) {
+ $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
+ } else {
+ $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
+ }
+
+ die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
+ if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
+}
+
sub clone_disk {
my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
$newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
push @$newvollist, $newvolid;
} else {
-
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
- $storeid = $storage if $storage;
+ my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
+ my $storeid = $storage || $src_storeid;
my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
$dst_format = 'raw';
$size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
} else {
- ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
+ clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
+
+ $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
}
$newvolid = PVE::Storage::vdisk_alloc(
$storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
$completion, $qga, $bwlimit);
} else {
- # TODO: handle bwlimits
if ($dst_drivename eq 'efidisk0') {
# the relevant data on the efidisk may be smaller than the source
# e.g. on RBD/ZFS, so we use dd to copy only the amount
push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
run_command($cmd);
} else {
- qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
+ qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
}
}
}
no_data_clone:
- my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
+ my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
my $disk = dclone($drive);
delete $disk->{format};
$efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
- die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
return -s $ovmf_vars;
}
my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
- die "EFI vars default image not found\n" if ! -f $ovmf_vars;
my $vars_size_b = -s $ovmf_vars;
my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
PVE::Storage::activate_volumes($storecfg, [$volid]);
qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
- my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
+ my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
return ($volid, $size/1024);
}
next;
}
+ my $bridge = $net->{bridge};
+ if (!$bridge) {
+ log_warn("Interface '$iface' not attached to any bridge.");
+ next;
+ }
if ($have_sdn) {
- PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $net->{bridge}, $net->{firewall});
- } else {
+ PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
+ } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
}
}
my $net = parse_net($conf->{$opt}) or next;
my $mac = $net->{macaddr} or next;
+ my $bridge = $net->{bridge};
if ($have_sdn) {
- PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $net->{bridge}, $net->{firewall});
- } else {
+ PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
+ } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
}
}