use IO::Socket::UNIX;
use IPC::Open3;
use JSON;
+use List::Util qw(first);
use MIME::Base64;
use POSIX;
use Storable qw(dclone);
use PVE::Exception qw(raise raise_param_exc);
use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
+use PVE::Mapping::PCI;
+use PVE::Mapping::USB;
use PVE::INotify;
use PVE::JSONSchema qw(get_standard_option parse_property_string);
use PVE::ProcFSTools;
use PVE::QemuServer::Memory;
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
-use PVE::QemuServer::USB qw(parse_usb_device);
+use PVE::QemuServer::USB;
my $have_sdn;
eval {
# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
# But you can ignore this kind of lock with the --skiplock flag.
-cfs_register_file('/qemu-server/',
- \&parse_vm_config,
- \&write_vm_config);
+cfs_register_file(
+ '/qemu-server/',
+ \&parse_vm_config,
+ \&write_vm_config
+);
PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
description => "Some command save/restore state from this location.",
});
PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
- description => "Specifies the Qemu machine type.",
+ description => "Specifies the QEMU machine type.",
type => 'string',
pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
maxLength => 40,
optional => 1,
});
-#no warnings 'redefine';
-
+# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
my $nodename_cache;
sub nodename {
$nodename_cache //= PVE::INotify::nodename();
my $agent_fmt = {
enabled => {
- description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
+ description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
type => 'boolean',
default => 0,
default_key => 1,
description => "Run fstrim after moving a disk or migrating the VM.",
type => 'boolean',
optional => 1,
- default => 0
+ default => 0,
+ },
+ 'freeze-fs-on-backup' => {
+ description => "Freeze/thaw guest filesystems on backup for consistency.",
+ type => 'boolean',
+ optional => 1,
+ default => 1,
},
type => {
description => "Select the agent type",
memory => {
optional => 1,
type => 'integer',
- description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
+ description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
." you use the balloon device.",
minimum => 16,
default => 512,
balloon => {
optional => 1,
type => 'integer',
- description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
+ description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
minimum => 0,
},
shares => {
win10;; Microsoft Windows 10/2016/2019
win11;; Microsoft Windows 11/2022
l24;; Linux 2.4 Kernel
-l26;; Linux 2.6 - 5.X Kernel
+l26;; Linux 2.6 - 6.X Kernel
solaris;; Solaris/OpenSolaris/OpenIndiania kernel
EODESC
},
},
agent => {
optional => 1,
- description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
+ description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
type => 'string',
format => $agent_fmt,
},
verbose_description => <<EODESCR,
Arbitrary arguments passed to kvm, for example:
-args: -no-reboot -no-hpet
+args: -no-reboot -smbios 'type=0,vendor=FOO'
NOTE: this option is for experts only.
EODESCR
},
affinity => {
type => 'string', format => 'pve-cpuset',
- description => "List of host cores used to execute guest processes.",
+ description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
optional => 1,
},
};
network => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all network data passed to the VM via'
- .' cloud-init.',
+ description => 'To pass a custom file containing all network data to the VM via cloud-init.',
format => 'pve-volume-id',
format_description => 'volume',
},
user => {
type => 'string',
optional => 1,
- description => 'Specify a custom file containing all user data passed to the VM via'
- .' cloud-init.',
+ description => 'To pass a custom file containing all user data to the VM via cloud-init.',
format => 'pve-volume-id',
format_description => 'volume',
},
vendor => {
- type => 'string',
- optional => 1,
- description => 'Specify a custom file containing all vendor data passed to the VM via'
- .' cloud-init.',
- format => 'pve-volume-id',
- format_description => 'volume',
+ type => 'string',
+ optional => 1,
+ description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
+ format => 'pve-volume-id',
+ format_description => 'volume',
},
};
PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
.' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
.' support hashed passwords.',
},
+ ciupgrade => {
+ optional => 1,
+ type => 'boolean',
+ description => 'cloud-init: do an automatic package upgrade after the first boot.',
+ default => 1,
+ },
cicustom => {
optional => 1,
type => 'string',
PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
}
-my $MAX_USB_DEVICES = 14;
my $MAX_NETS = 32;
my $MAX_SERIAL_PORTS = 4;
my $MAX_PARALLEL_PORTS = 3;
}),
queues => {
type => 'integer',
- minimum => 0, maximum => 16,
+ minimum => 0, maximum => 64,
description => 'Number of packet queues to be used on the device.',
optional => 1,
},
return $volid;
}
-my $usb_fmt = {
- host => {
- default_key => 1,
- type => 'string', format => 'pve-qm-usb-device',
- format_description => 'HOSTUSBDEVICE|spice',
- description => <<EODESCR,
-The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
-
- 'bus-port(.port)*' (decimal numbers) or
- 'vendor_id:product_id' (hexadeciaml numbers) or
- 'spice'
-
-You can use the 'lsusb -t' command to list existing usb devices.
-
-NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
-machines - use with special care.
-
-The value 'spice' can be used to add a usb redirection devices for spice.
-EODESCR
- },
- usb3 => {
- optional => 1,
- type => 'boolean',
- description => "Specifies whether if given host option is a USB3 device or port."
- ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
- ." is irrelevant (all devices are plugged into a xhci controller).",
- default => 0,
- },
-};
-
-my $usbdesc = {
- optional => 1,
- type => 'string', format => $usb_fmt,
- description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
- ." l26 or windows > 7, n can be up to 14).",
-};
-PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
-
my $serialdesc = {
optional => 1,
type => 'string',
$confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
}
-for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
- $confdesc->{"usb$i"} = $usbdesc;
+for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
+ $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
}
my $boot_fmt = {
my $cdrom_path;
sub get_cdrom_path {
- return $cdrom_path if $cdrom_path;
+ return $cdrom_path if defined($cdrom_path);
- return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
- return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
- return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
+ $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
+
+ if (!defined($cdrom_path)) {
+ log_warn("no physical CD-ROM available, ignoring");
+ $cdrom_path = '';
+ }
+
+ return $cdrom_path;
}
sub get_iso_path {
my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
my $controller = int($drive->{index} / $maxdev);
my $unit = $drive->{index} % $maxdev;
+
+ # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
+ # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
+ # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
+ # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
+ # were before. Move odd ones up by 2 where they don't clash.
+ if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
+ $controller += 2 * ($unit % 2);
+ $unit = 0;
+ }
+
my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
$device = "ide-$devicetype";
return $initiator;
}
+my sub storage_allows_io_uring_default {
+ my ($scfg, $cache_direct) = @_;
+
+ # io_uring with cache mode writeback or writethrough on krbd will hang...
+ return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
+
+ # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
+ # sometimes, just plain disable...
+ return if $scfg && $scfg->{type} eq 'lvm';
+
+ # io_uring causes problems when used with CIFS since kernel 5.15
+ # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
+ return if $scfg && $scfg->{type} eq 'cifs';
+
+ return 1;
+}
+
+my sub drive_uses_cache_direct {
+ my ($drive, $scfg) = @_;
+
+ my $cache_direct = 0;
+
+ if (my $cache = $drive->{cache}) {
+ $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
+ } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
+ $cache_direct = 1;
+ }
+
+ return $cache_direct;
+}
+
sub print_drive_commandline_full {
my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
$opts .= ",format=$format";
}
- my $cache_direct = 0;
-
- if (my $cache = $drive->{cache}) {
- $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
- } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
- $opts .= ",cache=none";
- $cache_direct = 1;
- }
-
- # io_uring with cache mode writeback or writethrough on krbd will hang...
- my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
-
- # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
- # sometimes, just plain disable...
- my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
+ my $cache_direct = drive_uses_cache_direct($drive, $scfg);
- # io_uring causes problems when used with CIFS since kernel 5.15
- # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
- my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
+ $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
if (!$drive->{aio}) {
- if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
+ if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
# io_uring supports all cache modes
$opts .= ",aio=io_uring";
} else {
}
sub print_netdevice_full {
- my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
+ my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
my $device = $net->{model};
if ($net->{model} eq 'virtio') {
# and out of each queue plus one config interrupt and control vector queue
my $vectors = $net->{queues} * 2 + 2;
$tmpstr .= ",vectors=$vectors,mq=on";
+ if (min_version($machine_version, 7, 1)) {
+ $tmpstr .= ",packed=on";
+ }
+ }
+
+ if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
+ $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
}
+
$tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
if (my $mtu = $net->{mtu}) {
# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
sub parse_net {
- my ($data) = @_;
+ my ($data, $disable_mac_autogen) = @_;
my $res = eval { parse_property_string($net_fmt, $data) };
if ($@) {
warn $@;
return;
}
- if (!defined($res->{macaddr})) {
+ if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
$res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
}
return;
}
-PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
-sub verify_usb_device {
- my ($value, $noerr) = @_;
-
- return $value if parse_usb_device($value);
-
- return if $noerr;
-
- die "unable to parse usb device\n";
-}
-
# add JSON properties for create and set function
sub json_config_properties {
my ($prop, $with_disk_alloc) = @_;
return dclone($confdesc_cloudinit);
}
+sub cloudinit_pending_properties {
+ my $p = {
+ map { $_ => 1 } keys $confdesc_cloudinit->%*,
+ name => 1,
+ };
+ $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
+ return $p;
+}
+
sub check_type {
my ($key, $value) = @_;
my $conf = PVE::QemuConfig->load_config($vmid);
- PVE::QemuConfig->check_lock($conf) if !$skiplock;
+ if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
+ PVE::QemuConfig->check_lock($conf);
+ }
if ($conf->{template}) {
# check if any base image is still used by a linked clone
my $conf = $res;
my $descr;
+ my $finish_description = sub {
+ if (defined($descr)) {
+ $descr =~ s/\s+$//;
+ $conf->{description} = $descr;
+ }
+ $descr = undef;
+ };
my $section = '';
my @lines = split(/\n/, $raw);
if ($line =~ m/^\[PENDING\]\s*$/i) {
$section = 'pending';
- if (defined($descr)) {
- $descr =~ s/\s+$//;
- $conf->{description} = $descr;
- }
- $descr = undef;
+ $finish_description->();
$conf = $res->{$section} = {};
next;
} elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
$section = 'cloudinit';
- $descr = undef;
+ $finish_description->();
$conf = $res->{$section} = {};
next;
} elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
$section = $1;
- if (defined($descr)) {
- $descr =~ s/\s+$//;
- $conf->{description} = $descr;
- }
- $descr = undef;
+ $finish_description->();
$conf = $res->{snapshots}->{$section} = {};
next;
}
} elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
my $key = $1;
my $value = $2;
+ if ($section eq 'cloudinit') {
+ # ignore validation only used for informative purpose
+ $conf->{$key} = $value;
+ next;
+ }
eval { $value = check_type($key, $value); };
if ($@) {
$handle_error->("vm $vmid - unable to parse value of '$key' - $@");
}
}
- if (defined($descr)) {
- $descr =~ s/\s+$//;
- $conf->{description} = $descr;
- }
+ $finish_description->();
delete $res->{snapstate}; # just to be sure
return $res;
&$cleanup_config($conf->{pending}, 1);
- &$cleanup_config($conf->{cloudinit});
-
foreach my $snapname (keys %{$conf->{snapshots}}) {
die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
&$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
$raw .= &$generate_raw_config($conf->{pending}, 1);
}
- if (scalar(keys %{$conf->{cloudinit}})){
+ if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
$raw .= "\n[special:cloudinit]\n";
$raw .= &$generate_raw_config($conf->{cloudinit});
}
my ($conf, $noerr) = @_;
my @loc_res = ();
+ my $mapped_res = [];
+
+ my $nodelist = PVE::Cluster::get_nodelist();
+ my $pci_map = PVE::Mapping::PCI::config();
+ my $usb_map = PVE::Mapping::USB::config();
+
+ my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
+
+ my $add_missing_mapping = sub {
+ my ($type, $key, $id) = @_;
+ for my $node (@$nodelist) {
+ my $entry;
+ if ($type eq 'pci') {
+ $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
+ } elsif ($type eq 'usb') {
+ $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
+ }
+ if (!scalar($entry->@*)) {
+ push @{$missing_mappings_by_node->{$node}}, $key;
+ }
+ }
+ };
push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
push @loc_res, "ivshmem" if $conf->{ivshmem};
foreach my $k (keys %$conf) {
- next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
+ if ($k =~ m/^usb/) {
+ my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
+ next if $entry->{host} =~ m/^spice$/i;
+ if ($entry->{mapping}) {
+ $add_missing_mapping->('usb', $k, $entry->{mapping});
+ push @$mapped_res, $k;
+ }
+ }
+ if ($k =~ m/^hostpci/) {
+ my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
+ if ($entry->{mapping}) {
+ $add_missing_mapping->('pci', $k, $entry->{mapping});
+ push @$mapped_res, $k;
+ }
+ }
# sockets are safe: they will recreated be on the target side post-migrate
next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
die "VM uses local resources\n" if scalar @loc_res && !$noerr;
- return \@loc_res;
+ return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
}
# check if used storages are available on all nodes (use by migrate)
sub check_running {
my ($vmid, $nocheck, $node) = @_;
+ # $nocheck is set when called during a migration, in which case the config
+ # file might still or already reside on the *other* node
+ # - because rename has already happened, and current node is source
+ # - because rename hasn't happened yet, and current node is target
+ # - because rename has happened, current node is target, but hasn't yet
+ # processed it yet
PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
return PVE::QemuServer::Helpers::vm_running_locally($vmid);
}
our $vmstatus_return_properties = {
vmid => get_standard_option('pve-vmid'),
status => {
- description => "Qemu process status.",
+ description => "QEMU process status.",
type => 'string',
enum => ['stopped', 'running'],
},
optional => 1,
},
qmpstatus => {
- description => "Qemu QMP agent status.",
+ description => "VM run state from the 'query-status' QMP monitor command.",
type => 'string',
optional => 1,
},
});
}
+ # Used to distinguish different invocations in the log.
+ my $log_prefix = "[id=" . int(time()) . "] ";
+
my $emulator_cmd = [
"swtpm",
"socket",
"file=$paths->{pid}",
"--terminate", # terminate on QEMU disconnect
"--daemon",
+ "--log",
+ "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
];
push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
run_command($emulator_cmd, outfunc => sub { print $1; });
or die "no OVMF images known for architecture '$arch'\n";
my $type = 'default';
- if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
+ if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
$type = $smm ? "4m" : "4m-no-smm";
$type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
}
- return $types->{$type}->@*;
+ my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
+ die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
+ die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
+
+ return ($ovmf_code, $ovmf_vars);
}
my $Arch2Qemu = {
# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
my sub should_disable_smm {
- my ($conf, $vga) = @_;
+ my ($conf, $vga, $machine) = @_;
+
+ return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
$vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
}
+my sub print_ovmf_drive_commandlines {
+ my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
+
+ my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
+
+ my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
+
+ my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
+ if ($d) {
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
+ my ($path, $format) = $d->@{'file', 'format'};
+ if ($storeid) {
+ $path = PVE::Storage::path($storecfg, $d->{file});
+ if (!defined($format)) {
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $format = qemu_img_format($scfg, $volname);
+ }
+ } elsif (!defined($format)) {
+ die "efidisk format must be specified\n";
+ }
+ # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
+ if ($path =~ m/^rbd:/) {
+ $var_drive_str .= ',cache=writeback';
+ $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
+ }
+ $var_drive_str .= ",format=$format,file=$path";
+
+ $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
+ $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
+ } else {
+ log_warn("no efidisk configured! Using temporary efivars disk.");
+ my $path = "/tmp/$vmid-ovmf.fd";
+ PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
+ $var_drive_str .= ",format=raw,file=$path";
+ $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
+ }
+
+ return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
+}
+
sub config_to_command {
my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
$pbs_backing) = @_;
- my $cmd = [];
my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
my $devices = [];
my $bridges = {};
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
+ my $cmd = [];
if ($conf->{affinity}) {
- push @$cmd, "/usr/bin/taskset";
- push @$cmd, "--cpu-list";
- push @$cmd, "--all-tasks";
- push @$cmd, $conf->{affinity};
+ push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
}
push @$cmd, $kvm_binary;
}
if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
- my $d;
- if (my $efidisk = $conf->{efidisk0}) {
- $d = parse_drive('efidisk0', $efidisk);
- }
-
- my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
- die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
-
- my ($path, $format);
- my $read_only_str = '';
- if ($d) {
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
- $format = $d->{format};
- if ($storeid) {
- $path = PVE::Storage::path($storecfg, $d->{file});
- if (!defined($format)) {
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
- }
- } else {
- $path = $d->{file};
- die "efidisk format must be specified\n"
- if !defined($format);
- }
-
- $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
- } else {
- log_warn("no efidisk configured! Using temporary efivars disk.");
- $path = "/tmp/$vmid-ovmf.fd";
- PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
- $format = 'raw';
- }
-
- my $size_str = "";
-
- if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
- $size_str = ",size=" . (-s $ovmf_vars);
- }
-
- # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
- my $cache = "";
- if ($path =~ m/^rbd:/) {
- $cache = ',cache=writeback';
- $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
- }
-
- push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
- push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
+ my ($code_drive_str, $var_drive_str) =
+ print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
+ push $cmd->@*, '-drive', $code_drive_str;
+ push $cmd->@*, '-drive', $var_drive_str;
}
if ($q35) { # tell QEMU to load q35 config early
# add usb controllers
my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
- $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version);
+ $conf, $bridges, $arch, $machine_type, $machine_version);
push @$devices, @usbcontrollers if @usbcontrollers;
my $vga = parse_vga($conf->{vga});
my $bootorder = device_bootorder($conf);
# host pci device passthrough
- my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
- $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
+ my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
+ $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
# usb devices
my $usb_dev_features = {};
$usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
- $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version);
+ $conf, $usb_dev_features, $bootorder, $machine_version);
push @$devices, @usbdevices if @usbdevices;
# serial devices
if ($path eq 'socket') {
my $socket = "/var/run/qemu-server/${vmid}.serial$i";
push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
- # On aarch64, serial0 is the UART device. Qemu only allows
+ # On aarch64, serial0 is the UART device. QEMU only allows
# connecting UART devices via the '-serial' command line, as
# the device has a fixed slot on the hardware...
if ($arch eq 'aarch64' && $i == 0) {
}
} else {
die "no such serial device\n" if ! -c $path;
- push @$devices, '-chardev', "tty,id=serial$i,path=$path";
+ push @$devices, '-chardev', "serial,id=serial$i,path=$path";
push @$devices, '-device', "isa-serial,chardev=serial$i";
}
}
for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
if (my $path = $conf->{"parallel$i"}) {
die "no such parallel device\n" if ! -c $path;
- my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
+ my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
push @$devices, '-device', "isa-parallel,chardev=parallel$i";
}
push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
- push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
+ push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
if ($winversion >= 6) {
push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
- push @$cmd, '-no-hpet';
+ push @$machineFlags, 'hpet=off';
}
push @$rtcFlags, 'driftfix=slew' if $tdf;
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
- PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
+ PVE::QemuServer::Memory::config(
+ $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
push @$cmd, '-S' if $conf->{freeze};
next if !$conf->{$netname};
my $d = parse_net($conf->{$netname});
next if !$d;
+ # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
$use_virtio = 1 if $d->{model} eq 'virtio';
push @$devices, '-netdev', $netdevfull;
my $netdevicefull = print_netdevice_full(
- $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
+ $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
push @$devices, '-device', $netdevicefull;
}
push @$machineFlags, 'accel=tcg';
}
- push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
+ push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
my $machine_type_min = $machine_type;
if ($add_pve_version) {
push @$cmd, @$aa;
}
- return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
+ return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
}
sub check_rng_source {
my $to_check = [];
for my $d (@$devices_to_check) {
$devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
- next if !$d->{'pci_bridge'};
+ next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
$devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
push @$to_check, @{$d->{'pci_bridge'}->{devices}};
return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
+ my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
my $netdevicefull = print_netdevice_full(
- $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
+ $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
qemu_deviceadd($vmid, $netdevicefull);
eval {
qemu_deviceaddverify($vmid, $deviceid);
qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
}
- # print_usbdevice_full expects the parsed device
- my $d = parse_usb_device($device->{host});
- $d->{usb3} = $device->{usb3};
-
# add the new one
- vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
+ vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
}
sub qemu_cpu_hotplug {
my $running = check_running($vmid);
- $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
+ PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
return if !$running;
my $volhash = {};
my $test_volid = sub {
- my ($key, $drive, $snapname) = @_;
+ my ($key, $drive, $snapname, $pending) = @_;
my $volid = $drive->{file};
return if !$volid;
$volhash->{$volid}->{shared} //= 0;
$volhash->{$volid}->{shared} = 1 if $drive->{shared};
- $volhash->{$volid}->{referenced_in_config} //= 0;
- $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
+ $volhash->{$volid}->{is_unused} //= 0;
+ $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
+
+ $volhash->{$volid}->{is_attached} //= 0;
+ $volhash->{$volid}->{is_attached} = 1
+ if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
$volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
if defined($snapname);
+ $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
+
my $size = $drive->{size};
$volhash->{$volid}->{size} //= $size if $size;
$volhash->{$volid}->{is_tpmstate} //= 0;
$volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
- $volhash->{$volid}->{is_unused} //= 0;
- $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
-
$volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
};
};
PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
+
+ PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
+ if defined($conf->{pending}) && $conf->{pending}->%*;
+
foreach my $snapname (keys %{$conf->{snapshots}}) {
my $snap = $conf->{snapshots}->{$snapname};
PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
}
my $fast_plug_option = {
+ 'description' => 1,
+ 'hookscript' => 1,
'lock' => 1,
+ 'migrate_downtime' => 1,
+ 'migrate_speed' => 1,
'name' => 1,
'onboot' => 1,
+ 'protection' => 1,
'shares' => 1,
'startup' => 1,
- 'description' => 1,
- 'protection' => 1,
- 'vmstatestorage' => 1,
- 'hookscript' => 1,
'tags' => 1,
+ 'vmstatestorage' => 1,
};
for my $opt (keys %$confdesc_cloudinit) {
$errors->{$opt} = "hotplug problem - $msg";
};
+ my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
+
+ my $cloudinit_record_changed = sub {
+ my ($conf, $opt, $old, $new) = @_;
+ return if !$cloudinit_pending_properties->{$opt};
+
+ my $ci = ($conf->{cloudinit} //= {});
+
+ my $recorded = $ci->{$opt};
+ my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
+
+ if (defined($new)) {
+ if (defined($old)) {
+ # an existing value is being modified
+ if (defined($recorded)) {
+ # the value was already not in sync
+ if ($new eq $recorded) {
+ # a value is being reverted to the cloud-init state:
+ delete $ci->{$opt};
+ delete $added{$opt};
+ } else {
+ # the value was changed multiple times, do nothing
+ }
+ } elsif ($added{$opt}) {
+ # the value had been marked as added and is being changed, do nothing
+ } else {
+ # the value is new, record it:
+ $ci->{$opt} = $old;
+ }
+ } else {
+ # a new value is being added
+ if (defined($recorded)) {
+ # it was already not in sync
+ if ($new eq $recorded) {
+ # a value is being reverted to the cloud-init state:
+ delete $ci->{$opt};
+ delete $added{$opt};
+ } else {
+ # the value had temporarily been removed, do nothing
+ }
+ } elsif ($added{$opt}) {
+ # the value had been marked as added already, do nothing
+ } else {
+ # the value is new, add it
+ $added{$opt} = 1;
+ }
+ }
+ } elsif (!defined($old)) {
+ # a non-existent value is being removed? ignore...
+ } else {
+ # a value is being deleted
+ if (defined($recorded)) {
+ # a value was already recorded, just keep it
+ } elsif ($added{$opt}) {
+ # the value was marked as added, remove it
+ delete $added{$opt};
+ } else {
+ # a previously unrecorded value is being removed, record the old value:
+ $ci->{$opt} = $old;
+ }
+ }
+
+ my $added = join(',', sort keys %added);
+ $ci->{added} = $added if length($added);
+ };
+
my $changes = 0;
foreach my $opt (keys %{$conf->{pending}}) { # add/change
if ($fast_plug_option->{$opt}) {
- $conf->{$opt} = $conf->{pending}->{$opt};
- delete $conf->{pending}->{$opt};
+ my $new = delete $conf->{pending}->{$opt};
+ $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
+ $conf->{$opt} = $new;
$changes = 1;
}
}
my $cgroup = PVE::QemuServer::CGroup->new($vmid);
my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
+
foreach my $opt (sort keys %$pending_delete_hash) {
next if $selection && !$selection->{$opt};
my $force = $pending_delete_hash->{$opt}->{force};
vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
} elsif ($opt =~ m/^memory$/) {
die "skip\n" if !$hotplug_features->{memory};
- PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
+ PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
} elsif ($opt eq 'cpuunits') {
$cgroup->change_cpu_shares(undef);
} elsif ($opt eq 'cpulimit') {
if (my $err = $@) {
&$add_error($opt, $err) if $err ne "skip\n";
} else {
- delete $conf->{$opt};
+ my $old = delete $conf->{$opt};
+ $cloudinit_record_changed->($conf, $opt, $old, undef);
PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
}
}
+ my $cloudinit_opt;
foreach my $opt (keys %{$conf->{pending}}) {
next if $selection && !$selection->{$opt};
my $value = $conf->{pending}->{$opt};
} elsif ($opt =~ m/^usb(\d+)$/) {
my $index = $1;
die "skip\n" if !$usb_hotplug;
- my $d = eval { parse_property_string($usbdesc->{format}, $value) };
+ my $d = eval { parse_property_string('pve-qm-usb', $value) };
my $id = $opt;
- if ($d->{host} eq 'spice') {
+ if ($d->{host} =~ m/^spice$/i) {
$id = "usbredirdev$index";
}
qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
# some changes can be done without hotplug
my $drive = parse_drive($opt, $value);
if (drive_is_cloudinit($drive)) {
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
+ $cloudinit_opt = [$opt, $drive];
+ # apply all the other changes first, then generate the cloudinit disk
+ die "skip\n";
}
vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
$vmid, $opt, $value, $arch, $machine_type);
} elsif ($opt =~ m/^memory$/) { #dimms
die "skip\n" if !$hotplug_features->{memory};
- $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
+ $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
} elsif ($opt eq 'cpuunits') {
my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
$cgroup->change_cpu_shares($new_cpuunits);
die "skip\n"; # skip non-hot-pluggable options
}
};
+ if (my $err = $@) {
+ &$add_error($opt, $err) if $err ne "skip\n";
+ } else {
+ $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
+ $conf->{$opt} = $value;
+ delete $conf->{pending}->{$opt};
+ }
+ }
+
+ if (defined($cloudinit_opt)) {
+ my ($opt, $drive) = @$cloudinit_opt;
+ my $value = $conf->{pending}->{$opt};
+ eval {
+ my $temp = {%$conf, $opt => $value};
+ PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
+ vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
+ $vmid, $opt, $value, $arch, $machine_type);
+ };
if (my $err = $@) {
&$add_error($opt, $err) if $err ne "skip\n";
} else {
# unplug xhci controller if no usb device is left
if ($usb_hotplug) {
my $has_usb = 0;
- for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
+ for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
next if !defined($conf->{"usb$i"});
$has_usb = 1;
last;
PVE::QemuConfig->write_config($vmid, $conf);
- if($hotplug_features->{cloudinit}) {
- my $pending = PVE::QemuServer::Cloudinit::get_pending_config($conf, $vmid);
- my $regenerate = undef;
- for my $item (@$pending) {
- $regenerate = 1 if defined($item->{delete}) or defined($item->{pending});
- }
- PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid) if $regenerate;
+ if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
+ PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
}
}
sub vmconfig_apply_pending {
- my ($vmid, $conf, $storecfg, $errors) = @_;
+ my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
return if !scalar(keys %{$conf->{pending}});
PVE::QemuConfig->cleanup_pending($conf);
- my $generate_cloudnit = undef;
+ my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
foreach my $opt (keys %{$conf->{pending}}) { # add/change
next if $opt eq 'delete'; # just to be sure
if (is_valid_drivename($opt)) {
my $drive = parse_drive($opt, $conf->{pending}->{$opt});
- $generate_cloudnit = 1 if drive_is_cloudinit($drive);
+ $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
}
$conf->{$opt} = delete $conf->{pending}->{$opt};
# write all changes at once to avoid unnecessary i/o
PVE::QemuConfig->write_config($vmid, $conf);
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if $generate_cloudnit;
+ if ($generate_cloudinit) {
+ if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
+ # After successful generation and if there were changes to be applied, update the
+ # config to drop the {cloudinit} entry.
+ PVE::QemuConfig->write_config($vmid, $conf);
+ }
+ }
}
sub vmconfig_update_net {
if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
+ safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
!($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
# for non online change, we try to hot-unplug
# update existing disk
# skip non hotpluggable value
- if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
+ if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
+ safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
- safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
+ safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
+ safe_string_ne($drive->{ro}, $old_drive->{ro})) {
die "skip\n";
}
return if !$cloudinit_drive;
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
+ if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
+ PVE::QemuConfig->write_config($vmid, $conf);
+ }
+
my $running = PVE::QemuServer::check_running($vmid);
if ($running) {
# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
# skiplock => 0/1, skip checking for config lock
# skiptemplate => 0/1, skip checking whether VM is template
-# forcemachine => to force Qemu machine (rollback/migration)
+# forcemachine => to force QEMU machine (rollback/migration)
# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
# timeout => in seconds
# paused => start VM in paused state (backup)
# don't regenerate the ISO if the VM is started as part of a live migration
# this way we can reuse the old ISO with the correct config
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
+ if (!$migratedfrom) {
+ if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
+ # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
+ # $conf->{cloudinit}, so we could just not do this?
+ # But we do it above, so for now let's be consistent.
+ $conf = PVE::QemuConfig->load_config($vmid); # update/reload
+ }
+ }
# override offline migrated volumes, conf is out of date still
if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
my $defaults = load_defaults();
# set environment variable useful inside network script
- $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
+ # for remote migration the config is available on the target node!
+ if (!$migrate_opts->{remote_node}) {
+ $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
+ }
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
print "Resuming suspended VM\n";
}
- my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
+ my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
$conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
my $migration_ip;
return $migration_ip;
};
- my $migrate_uri;
if ($statefile) {
if ($statefile eq 'tcp') {
- my $localip = "localhost";
+ my $migrate = $res->{migrate} = { proto => 'tcp' };
+ $migrate->{addr} = "localhost";
my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
my $nodename = nodename();
}
if ($migration_type eq 'insecure') {
- $localip = $get_migration_ip->($nodename);
- $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
+ $migrate->{addr} = $get_migration_ip->($nodename);
+ $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
}
my $pfamily = PVE::Tools::get_host_address_family($nodename);
- my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
- $migrate_uri = "tcp:${localip}:${migrate_port}";
- push @$cmd, '-incoming', $migrate_uri;
+ $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
+ $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
+ push @$cmd, '-incoming', $migrate->{uri};
push @$cmd, '-S';
} elsif ($statefile eq 'unix') {
# should be default for secure migrations as a ssh TCP forward
# tunnel is not deterministic reliable ready and fails regurarly
# to set up in time, so use UNIX socket forwards
- my $socket_addr = "/run/qemu-server/$vmid.migrate";
- unlink $socket_addr;
-
- $migrate_uri = "unix:$socket_addr";
+ my $migrate = $res->{migrate} = { proto => 'unix' };
+ $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
+ unlink $migrate->{addr};
- push @$cmd, '-incoming', $migrate_uri;
+ $migrate->{uri} = "unix:$migrate->{addr}";
+ push @$cmd, '-incoming', $migrate->{uri};
push @$cmd, '-S';
} elsif (-e $statefile) {
my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
- my $pci_devices = {}; # host pci devices
- for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
- my $dev = $conf->{"hostpci$i"} or next;
- $pci_devices->{$i} = parse_hostpci($dev);
+ my $pci_reserve_list = [];
+ for my $device (values $pci_devices->%*) {
+ next if $device->{mdev}; # we don't reserve for mdev devices
+ push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
}
- # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
- my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
-
- # map to a flat list of pci ids
- my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
-
# reserve all PCI IDs before actually doing anything with them
- PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
+ PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
eval {
my $uuid;
for my $id (sort keys %$pci_devices) {
my $d = $pci_devices->{$id};
- for my $dev ($d->{pciid}->@*) {
- my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
+ my ($index) = ($id =~ m/^hostpci(\d+)$/);
+
+ my $chosen_mdev;
+ for my $dev ($d->{ids}->@*) {
+ my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
+ if ($d->{mdev}) {
+ warn $@ if $@;
+ $chosen_mdev = $info;
+ last if $chosen_mdev; # if successful, we're done
+ } else {
+ die $@ if $@;
+ }
+ }
- # nvidia grid needs the uuid of the mdev as qemu parameter
- if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
- $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
+ next if !$d->{mdev};
+ die "could not create mediated device\n" if !defined($chosen_mdev);
+
+ # nvidia grid needs the uuid of the mdev as qemu parameter
+ if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
+ if (defined($conf->{smbios1})) {
+ my $smbios_conf = parse_smbios1($conf->{smbios1});
+ $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
}
+ $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
}
}
push @$cmd, '-uuid', $uuid if defined($uuid);
PVE::Storage::activate_volumes($storecfg, $vollist);
- eval {
- run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
- };
+
+ my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
+ eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
+ eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
# Issues with the above 'stop' not being fully completed are extremely rare, a very low
# timeout should be more than enough here...
PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
if ($conf->{hugepages}) {
my $code = sub {
- my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
+ my $hotplug_features =
+ parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+ my $hugepages_topology =
+ PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
+
my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
PVE::QemuServer::Memory::hugepages_mount();
# re-reserve all PCI IDs now that we can know the actual VM PID
my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
- eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
+ eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
warn $@ if $@;
- print "migration listens on $migrate_uri\n" if $migrate_uri;
- $res->{migrate_uri} = $migrate_uri;
-
- if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
+ if (defined($res->{migrate})) {
+ print "migration listens on $res->{migrate}->{uri}\n";
+ } elsif ($statefile) {
eval { mon_cmd($vmid, "cont"); };
warn $@ if $@;
}
my $migrate_storage_uri;
# nbd_protocol_version > 0 for unix socket support
- if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
+ if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
$migrate_storage_uri = "nbd:unix:$socket_path";
+ $res->{migrate}->{unix_sockets} = [$socket_path];
} else {
my $nodename = nodename();
my $localip = $get_migration_ip->($nodename);
$migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
}
- $res->{migrate_storage_uri} = $migrate_storage_uri;
+ my $block_info = mon_cmd($vmid, "query-block");
+ $block_info = { map { $_->{device} => $_ } $block_info->@* };
foreach my $opt (sort keys %$nbd) {
my $drivestr = $nbd->{$opt}->{drivestr};
my $volid = $nbd->{$opt}->{volid};
- mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
+
+ my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
+
+ mon_cmd(
+ $vmid,
+ "block-export-add",
+ id => "drive-$opt",
+ 'node-name' => $block_node,
+ writable => JSON::true,
+ type => "nbd",
+ name => "drive-$opt", # NBD export name
+ );
+
my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
print "storage migration listens on $nbd_uri volume:$drivestr\n";
print "re-using replicated volume: $opt - $volid\n"
add_nets_bridge_fdb($conf, $vmid);
}
- mon_cmd($vmid, 'qom-set',
+ if (!defined($conf->{balloon}) || $conf->{balloon}) {
+ eval {
+ mon_cmd(
+ $vmid,
+ 'qom-set',
path => "machine/peripheral/balloon0",
property => "guest-stats-polling-interval",
- value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
+ value => 2
+ );
+ };
+ log_warn("could not set polling interval for ballooning - $@") if $@;
+ }
if ($resume) {
print "Resumed VM, removing state\n";
# NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
# don't want to break ABI just for this two liner
my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
+
+ # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
+ # out when we do it first. so wait for 10 seconds and then try it
+ if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
+ sleep 10;
+ }
+
PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
}
}
}
}
+# $nocheck is set when called as part of a migration - in this context the
+# location of the config file (source or target node) is not deterministic,
+# since migration cannot wait for pmxcfs to process the rename
sub vm_resume {
my ($vmid, $skiplock, $nocheck) = @_;
my $res = mon_cmd($vmid, 'query-status');
my $resume_cmd = 'cont';
my $reset = 0;
- my $conf = PVE::QemuConfig->load_config($vmid);
+ my $conf;
+ if ($nocheck) {
+ $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
+ if ($@) {
+ my $vmlist = PVE::Cluster::get_vmlist();
+ if (exists($vmlist->{ids}->{$vmid})) {
+ my $node = $vmlist->{ids}->{$vmid}->{node};
+ $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
+ }
+ if (!$conf) {
+ PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
+ $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
+ }
+ }
+ } else {
+ $conf = PVE::QemuConfig->load_config($vmid);
+ }
if ($res->{status}) {
return if $res->{status} eq 'running'; # job done, go home
}
if (!$nocheck) {
-
PVE::QemuConfig->check_lock($conf)
if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
}
});
}
+sub check_bridge_access {
+ my ($rpcenv, $authuser, $conf) = @_;
+
+ return 1 if $authuser eq 'root@pam';
+
+ for my $opt (sort keys $conf->%*) {
+ next if $opt !~ m/^net\d+$/;
+ my $net = parse_net($conf->{$opt});
+ my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
+ PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
+ }
+ return 1;
+};
+
+sub check_mapping_access {
+ my ($rpcenv, $user, $conf) = @_;
+
+ for my $opt (keys $conf->%*) {
+ if ($opt =~ m/^usb\d+$/) {
+ my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
+ if (my $host = $device->{host}) {
+ die "only root can set '$opt' config for real devices\n"
+ if $host !~ m/^spice$/i && $user ne 'root@pam';
+ } elsif ($device->{mapping}) {
+ $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
+ } else {
+ die "either 'host' or 'mapping' must be set.\n";
+ }
+ } elsif ($opt =~ m/^hostpci\d+$/) {
+ my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
+ if ($device->{host}) {
+ die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
+ } elsif ($device->{mapping}) {
+ $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
+ } else {
+ die "either 'host' or 'mapping' must be set.\n";
+ }
+ }
+ }
+};
+
+sub check_restore_permissions {
+ my ($rpcenv, $user, $conf) = @_;
+
+ check_bridge_access($rpcenv, $user, $conf);
+ check_mapping_access($rpcenv, $user, $conf);
+}
# vzdump restore implementaion
sub tar_archive_read_firstfile {
}
};
-my $restore_merge_config = sub {
+sub restore_merge_config {
my ($filename, $backup_conf_raw, $override_conf) = @_;
my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
}
return $backup_conf;
-};
+}
sub scan_volids {
my ($cfg, $vmid) = @_;
$new_conf_raw .= "\nlock: create";
}
- my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
+ my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
+ check_restore_permissions($rpcenv, $user, $new_conf);
PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
die $err;
}
- my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
+ my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
+ check_restore_permissions($rpcenv, $user, $new_conf);
PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
if ($@) {
- warn "Qemu Guest Agent is not running - $@" if !$nowarn;
+ warn "QEMU Guest Agent is not running - $@" if !$nowarn;
return 0;
}
return 1;
}
sub qemu_img_convert {
- my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
+ my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
my $storecfg = PVE::Storage::config();
my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
if $snapname && $src_format && $src_format eq "qcow2";
push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
push @$cmd, '-T', $cachemode if defined($cachemode);
+ push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
if ($src_is_iscsi) {
push @$cmd, '--image-opts';
}
}
+# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
+# source, but some storages have problems with io_uring, sometimes even leading to crashes.
+my sub clone_disk_check_io_uring {
+ my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
+
+ return if !$use_drive_mirror;
+
+ # Don't complain when not changing storage.
+ # Assume if it works for the source, it'll work for the target too.
+ return if $src_storeid eq $dst_storeid;
+
+ my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
+ my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
+
+ my $cache_direct = drive_uses_cache_direct($src_drive);
+
+ my $src_uses_io_uring;
+ if ($src_drive->{aio}) {
+ $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
+ } else {
+ $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
+ }
+
+ die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
+ if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
+}
+
sub clone_disk {
my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
$newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
push @$newvollist, $newvolid;
} else {
-
- my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
- $storeid = $storage if $storage;
+ my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
+ my $storeid = $storage || $src_storeid;
my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
$dst_format = 'raw';
$size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
} else {
- ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
+ clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
+
+ $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
}
$newvolid = PVE::Storage::vdisk_alloc(
$storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
$completion, $qga, $bwlimit);
} else {
- # TODO: handle bwlimits
if ($dst_drivename eq 'efidisk0') {
# the relevant data on the efidisk may be smaller than the source
# e.g. on RBD/ZFS, so we use dd to copy only the amount
push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
run_command($cmd);
} else {
- qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
+ qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
}
}
}
no_data_clone:
- my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
+ my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
my $disk = dclone($drive);
delete $disk->{format};
$efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
- die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
return -s $ovmf_vars;
}
my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
- die "EFI vars default image not found\n" if ! -f $ovmf_vars;
my $vars_size_b = -s $ovmf_vars;
my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
PVE::Storage::activate_volumes($storecfg, [$volid]);
qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
- my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
+ my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
return ($volid, $size/1024);
}
for my $opt (keys %$conf) {
next if $opt !~ m/^net(\d+)$/;
my $iface = "tap${vmid}i$1";
+ # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
+ my $net = parse_net($conf->{$opt}, 1) or next;
+
+ my $mac = $net->{macaddr};
+ if (!$mac) {
+ log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
+ if !file_read_firstline("/sys/class/net/$iface/brport/learning");
+ next;
+ }
+
+ my $bridge = $net->{bridge};
+ if (!$bridge) {
+ log_warn("Interface '$iface' not attached to any bridge.");
+ next;
+ }
+ if ($have_sdn) {
+ PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
+ } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
+ PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
+ }
+ }
+}
+
+sub del_nets_bridge_fdb {
+ my ($conf, $vmid) = @_;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $iface = "tap${vmid}i$1";
+
my $net = parse_net($conf->{$opt}) or next;
my $mac = $net->{macaddr} or next;
+ my $bridge = $net->{bridge};
if ($have_sdn) {
- PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $net->{bridge}, $net->{firewall});
- } else {
- PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
+ PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
+ } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
+ PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
}
}
}