X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=9811eba19106f570ba28aac8d40d4b2e8f6d58b9;hb=c351659d873a25d287d01fc65060b146414cceb1;hp=6cbaf878e99b0d417eea27d34aff65621e280cdb;hpb=58542139532e2878fd8d336643672d3aab7908b2;p=qemu-server.git diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 6cbaf87..9811eba 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -34,6 +34,8 @@ use PVE::DataCenterConfig; use PVE::Exception qw(raise raise_param_exc); use PVE::Format qw(render_duration render_bytes); use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne); +use PVE::Mapping::PCI; +use PVE::Mapping::USB; use PVE::INotify; use PVE::JSONSchema qw(get_standard_option parse_property_string); use PVE::ProcFSTools; @@ -47,16 +49,17 @@ use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_for use PVE::QMPClient; use PVE::QemuConfig; -use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version); +use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version); use PVE::QemuServer::Cloudinit; use PVE::QemuServer::CGroup; use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options); use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive); use PVE::QemuServer::Machine; -use PVE::QemuServer::Memory; +use PVE::QemuServer::Memory qw(get_current_memory); use PVE::QemuServer::Monitor qw(mon_cmd); use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci); -use PVE::QemuServer::USB qw(parse_usb_device); +use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel); +use PVE::QemuServer::USB; my $have_sdn; eval { @@ -83,6 +86,9 @@ my $OVMF = { "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd", "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd", ], + # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build + # anymore. how can we deperacate this sanely without breaking existing instances, or using + # older backups and snapshot? default => [ "$EDK2_FW_BASE/OVMF_CODE.fd", "$EDK2_FW_BASE/OVMF_VARS.fd", @@ -347,11 +353,9 @@ my $confdesc = { }, memory => { optional => 1, - type => 'integer', - description => "Amount of RAM for the VM in MiB. This is the maximum available memory when" - ." you use the balloon device.", - minimum => 16, - default => 512, + type => 'string', + description => "Memory properties.", + format => $PVE::QemuServer::Memory::memory_fmt }, balloon => { optional => 1, @@ -758,6 +762,7 @@ my $cicustom_fmt = { }; PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt); +# any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu my $confdesc_cloudinit = { citype => { optional => 1, @@ -783,7 +788,8 @@ my $confdesc_cloudinit = { ciupgrade => { optional => 1, type => 'boolean', - description => 'cloud-init: do an automatic package upgrade after the first boot.' + description => 'cloud-init: do an automatic package upgrade after the first boot.', + default => 1, }, cicustom => { optional => 1, @@ -835,48 +841,12 @@ while (my ($k, $v) = each %$confdesc) { PVE::JSONSchema::register_standard_option("pve-qm-$k", $v); } -my $MAX_USB_DEVICES = 14; my $MAX_NETS = 32; my $MAX_SERIAL_PORTS = 4; my $MAX_PARALLEL_PORTS = 3; -my $MAX_NUMA = 8; -my $numa_fmt = { - cpus => { - type => "string", - pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/, - description => "CPUs accessing this NUMA node.", - format_description => "id[-id];...", - }, - memory => { - type => "number", - description => "Amount of memory this NUMA node provides.", - optional => 1, - }, - hostnodes => { - type => "string", - pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/, - description => "Host NUMA nodes to use.", - format_description => "id[-id];...", - optional => 1, - }, - policy => { - type => 'string', - enum => [qw(preferred bind interleave)], - description => "NUMA allocation policy.", - optional => 1, - }, -}; -PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt); -my $numadesc = { - optional => 1, - type => 'string', format => $numa_fmt, - description => "NUMA topology.", -}; -PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc); - -for (my $i = 0; $i < $MAX_NUMA; $i++) { - $confdesc->{"numa$i"} = $numadesc; +for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) { + $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc; } my $nic_model_list = [ @@ -1081,44 +1051,6 @@ sub verify_volume_id_or_absolute_path { return $volid; } -my $usb_fmt = { - host => { - default_key => 1, - type => 'string', format => 'pve-qm-usb-device', - format_description => 'HOSTUSBDEVICE|spice', - description => < { - optional => 1, - type => 'boolean', - description => "Specifies whether if given host option is a USB3 device or port." - ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag" - ." is irrelevant (all devices are plugged into a xhci controller).", - default => 0, - }, -}; - -my $usbdesc = { - optional => 1, - type => 'string', format => $usb_fmt, - description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype" - ." l26 or windows > 7, n can be up to 14).", -}; -PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc); - my $serialdesc = { optional => 1, type => 'string', @@ -1167,8 +1099,8 @@ for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) { $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key}; } -for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) { - $confdesc->{"usb$i"} = $usbdesc; +for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) { + $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc; } my $boot_fmt = { @@ -1557,6 +1489,17 @@ sub print_drivedevice_full { my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2; my $controller = int($drive->{index} / $maxdev); my $unit = $drive->{index} % $maxdev; + + # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled + # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than + # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to + # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they + # were before. Move odd ones up by 2 where they don't clash. + if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') { + $controller += 2 * ($unit % 2); + $unit = 0; + } + my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd"; $device = "ide-$devicetype"; @@ -1798,7 +1741,7 @@ sub print_netdevice_full { } if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){ - $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024"; + $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256"; } $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ; @@ -1954,29 +1897,6 @@ sub print_vga_device { return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}"; } -sub parse_number_sets { - my ($set) = @_; - my $res = []; - foreach my $part (split(/;/, $set)) { - if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) { - die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1; - push @$res, [ $1, $2 ]; - } else { - die "invalid range: $part\n"; - } - } - return $res; -} - -sub parse_numa { - my ($data) = @_; - - my $res = parse_property_string($numa_fmt, $data); - $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus}); - $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes}); - return $res; -} - # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate= sub parse_net { my ($data, $disable_mac_autogen) = @_; @@ -2244,17 +2164,6 @@ sub qemu_created_version_fixups { return; } -PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device); -sub verify_usb_device { - my ($value, $noerr) = @_; - - return $value if parse_usb_device($value); - - return if $noerr; - - die "unable to parse usb device\n"; -} - # add JSON properties for create and set function sub json_config_properties { my ($prop, $with_disk_alloc) = @_; @@ -2700,6 +2609,28 @@ sub check_local_resources { my ($conf, $noerr) = @_; my @loc_res = (); + my $mapped_res = []; + + my $nodelist = PVE::Cluster::get_nodelist(); + my $pci_map = PVE::Mapping::PCI::config(); + my $usb_map = PVE::Mapping::USB::config(); + + my $missing_mappings_by_node = { map { $_ => [] } @$nodelist }; + + my $add_missing_mapping = sub { + my ($type, $key, $id) = @_; + for my $node (@$nodelist) { + my $entry; + if ($type eq 'pci') { + $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node); + } elsif ($type eq 'usb') { + $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node); + } + if (!scalar($entry->@*)) { + push @{$missing_mappings_by_node->{$node}}, $key; + } + } + }; push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax @@ -2707,7 +2638,21 @@ sub check_local_resources { push @loc_res, "ivshmem" if $conf->{ivshmem}; foreach my $k (keys %$conf) { - next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/); + if ($k =~ m/^usb/) { + my $entry = parse_property_string('pve-qm-usb', $conf->{$k}); + next if $entry->{host} =~ m/^spice$/i; + if ($entry->{mapping}) { + $add_missing_mapping->('usb', $k, $entry->{mapping}); + push @$mapped_res, $k; + } + } + if ($k =~ m/^hostpci/) { + my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k}); + if ($entry->{mapping}) { + $add_missing_mapping->('pci', $k, $entry->{mapping}); + push @$mapped_res, $k; + } + } # sockets are safe: they will recreated be on the target side post-migrate next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket'); push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/; @@ -2715,7 +2660,7 @@ sub check_local_resources { die "VM uses local resources\n" if scalar @loc_res && !$noerr; - return \@loc_res; + return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res; } # check if used storages are available on all nodes (use by migrate) @@ -2959,8 +2904,7 @@ sub vmstatus { $d->{cpus} = $conf->{vcpus} if $conf->{vcpus}; $d->{name} = $conf->{name} || "VM $vmid"; - $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) - : $defaults->{memory}*(1024*1024); + $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024); if ($conf->{balloon}) { $d->{balloon_min} = $conf->{balloon}*(1024*1024); @@ -3402,9 +3346,13 @@ sub get_ovmf_files($$$) { or die "no OVMF images known for architecture '$arch'\n"; my $type = 'default'; - if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') { - $type = $smm ? "4m" : "4m-no-smm"; - $type .= '-ms' if $efidisk->{'pre-enrolled-keys'}; + if ($arch eq 'x86_64') { + if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') { + $type = $smm ? "4m" : "4m-no-smm"; + $type .= '-ms' if $efidisk->{'pre-enrolled-keys'}; + } else { + # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9 + } } my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*; @@ -3740,7 +3688,7 @@ sub config_to_command { # add usb controllers my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers( - $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version); + $conf, $bridges, $arch, $machine_type, $machine_version); push @$devices, @usbcontrollers if @usbcontrollers; my $vga = parse_vga($conf->{vga}); @@ -3774,15 +3722,15 @@ sub config_to_command { my $bootorder = device_bootorder($conf); # host pci device passthrough - my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices( - $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder); + my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices( + $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder); # usb devices my $usb_dev_features = {}; $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0); my @usbdevices = PVE::QemuServer::USB::get_usb_devices( - $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version); + $conf, $usb_dev_features, $bootorder, $machine_version); push @$devices, @usbdevices if @usbdevices; # serial devices @@ -3822,7 +3770,9 @@ sub config_to_command { push @$devices, @$audio_devs; } - add_tpm_device($vmid, $devices, $conf); + # Add a TPM only if the VM is not a template, + # to support backing up template VMs even if the TPM disk is write-protected. + add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf)); my $sockets = 1; $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused @@ -3903,7 +3853,7 @@ sub config_to_command { } PVE::QemuServer::Memory::config( - $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd); + $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd); push @$cmd, '-S' if $conf->{freeze}; @@ -4194,7 +4144,7 @@ sub config_to_command { push @$cmd, @$aa; } - return wantarray ? ($cmd, $vollist, $spice_port) : $cmd; + return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd; } sub check_rng_source { @@ -4432,21 +4382,6 @@ sub qemu_spice_usbredir_chardev_add { )); } -sub qemu_deviceadd { - my ($vmid, $devicefull) = @_; - - $devicefull = "driver=".$devicefull; - my %options = split(/[=,]/, $devicefull); - - mon_cmd($vmid, "device_add" , %options); -} - -sub qemu_devicedel { - my ($vmid, $deviceid) = @_; - - my $ret = mon_cmd($vmid, "device_del", id => $deviceid); -} - sub qemu_iothread_add { my ($vmid, $deviceid, $device) = @_; @@ -4465,22 +4400,6 @@ sub qemu_iothread_del { } } -sub qemu_objectadd { - my ($vmid, $objectid, $qomtype) = @_; - - mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype); - - return 1; -} - -sub qemu_objectdel { - my ($vmid, $objectid) = @_; - - mon_cmd($vmid, "object-del", id => $objectid); - - return 1; -} - sub qemu_driveadd { my ($storecfg, $vmid, $device) = @_; @@ -4653,12 +4572,8 @@ sub qemu_usb_hotplug { qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr)); } - # print_usbdevice_full expects the parsed device - my $d = parse_usb_device($device->{host}); - $d->{usb3} = $device->{usb3}; - # add the new one - vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type); + vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type); } sub qemu_cpu_hotplug { @@ -4860,7 +4775,7 @@ sub foreach_volid { my $volhash = {}; my $test_volid = sub { - my ($key, $drive, $snapname) = @_; + my ($key, $drive, $snapname, $pending) = @_; my $volid = $drive->{file}; return if !$volid; @@ -4875,12 +4790,18 @@ sub foreach_volid { $volhash->{$volid}->{shared} //= 0; $volhash->{$volid}->{shared} = 1 if $drive->{shared}; - $volhash->{$volid}->{referenced_in_config} //= 0; - $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname); + $volhash->{$volid}->{is_unused} //= 0; + $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/; + + $volhash->{$volid}->{is_attached} //= 0; + $volhash->{$volid}->{is_attached} = 1 + if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending; $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1 if defined($snapname); + $volhash->{$volid}->{referenced_in_pending} = 1 if $pending; + my $size = $drive->{size}; $volhash->{$volid}->{size} //= $size if $size; @@ -4890,9 +4811,6 @@ sub foreach_volid { $volhash->{$volid}->{is_tpmstate} //= 0; $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0'; - $volhash->{$volid}->{is_unused} //= 0; - $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/; - $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key); }; @@ -4902,6 +4820,10 @@ sub foreach_volid { }; PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid); + + PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1) + if defined($conf->{pending}) && $conf->{pending}->%*; + foreach my $snapname (keys %{$conf->{snapshots}}) { my $snap = $conf->{snapshots}->{$snapname}; PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname); @@ -5046,7 +4968,7 @@ sub vmconfig_hotplug_pending { my $force = $pending_delete_hash->{$opt}->{force}; eval { if ($opt eq 'hotplug') { - die "skip\n" if ($conf->{hotplug} =~ /memory/); + die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/); } elsif ($opt eq 'tablet') { die "skip\n" if !$hotplug_features->{usb}; if ($defaults->{tablet}) { @@ -5069,7 +4991,7 @@ sub vmconfig_hotplug_pending { # enable balloon device is not hotpluggable die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0; # here we reset the ballooning value to memory - my $balloon = $conf->{memory} || $defaults->{memory}; + my $balloon = get_current_memory($conf->{memory}); mon_cmd($vmid, "balloon", value => $balloon*1024*1024); } elsif ($fast_plug_option->{$opt}) { # do nothing @@ -5082,7 +5004,7 @@ sub vmconfig_hotplug_pending { vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); } elsif ($opt =~ m/^memory$/) { die "skip\n" if !$hotplug_features->{memory}; - PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults); + PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf); } elsif ($opt eq 'cpuunits') { $cgroup->change_cpu_shares(undef); } elsif ($opt eq 'cpulimit') { @@ -5107,6 +5029,7 @@ sub vmconfig_hotplug_pending { eval { if ($opt eq 'hotplug') { die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/); + die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/); } elsif ($opt eq 'tablet') { die "skip\n" if !$hotplug_features->{usb}; if ($value == 1) { @@ -5120,9 +5043,9 @@ sub vmconfig_hotplug_pending { } elsif ($opt =~ m/^usb(\d+)$/) { my $index = $1; die "skip\n" if !$usb_hotplug; - my $d = eval { parse_property_string($usbdesc->{format}, $value) }; + my $d = eval { parse_property_string('pve-qm-usb', $value) }; my $id = $opt; - if ($d->{host} eq 'spice') { + if ($d->{host} =~ m/^spice$/i) { $id = "usbredirdev$index"; } qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type); @@ -5137,7 +5060,8 @@ sub vmconfig_hotplug_pending { # allow manual ballooning if shares is set to zero if ((defined($conf->{shares}) && ($conf->{shares} == 0))) { - my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory}; + my $memory = get_current_memory($conf->{memory}); + my $balloon = $conf->{pending}->{balloon} || $memory; mon_cmd($vmid, "balloon", value => $balloon*1024*1024); } } elsif ($opt =~ m/^net(\d+)$/) { @@ -5157,7 +5081,7 @@ sub vmconfig_hotplug_pending { $vmid, $opt, $value, $arch, $machine_type); } elsif ($opt =~ m/^memory$/) { #dimms die "skip\n" if !$hotplug_features->{memory}; - $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value); + $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value); } elsif ($opt eq 'cpuunits') { my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp $cgroup->change_cpu_shares($new_cpuunits); @@ -5199,7 +5123,7 @@ sub vmconfig_hotplug_pending { # unplug xhci controller if no usb device is left if ($usb_hotplug) { my $has_usb = 0; - for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) { + for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) { next if !defined($conf->{"usb$i"}); $has_usb = 1; last; @@ -5562,9 +5486,11 @@ sub vm_migrate_get_nbd_disks { my $scfg = PVE::Storage::storage_config($storecfg, $storeid); return if $scfg->{shared}; + my $format = qemu_img_format($scfg, $volname); + # replicated disks re-use existing state via bitmap my $use_existing = $replicated_volumes->{$volid} ? 1 : 0; - $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing]; + $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format]; }); return $local_volumes; } @@ -5575,7 +5501,7 @@ sub vm_migrate_alloc_nbd_disks { my $nbd = {}; foreach my $opt (sort keys %$source_volumes) { - my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}}; + my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}}; if ($use_existing) { $nbd->{$opt}->{drivestr} = print_drive($drive); @@ -5584,29 +5510,13 @@ sub vm_migrate_alloc_nbd_disks { next; } - # storage mapping + volname = regular migration - # storage mapping + format = remote migration + $storeid = PVE::JSONSchema::map_id($storagemap, $storeid); + # order of precedence, filtered by whether storage supports it: # 1. explicit requested format - # 2. format of current volume - # 3. default format of storage - if (!$storagemap->{identity}) { - $storeid = PVE::JSONSchema::map_id($storagemap, $storeid); - my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); - if (!$format || !grep { $format eq $_ } @$validFormats) { - if ($volname) { - my $scfg = PVE::Storage::storage_config($storecfg, $storeid); - my $fileFormat = qemu_img_format($scfg, $volname); - $format = $fileFormat - if grep { $fileFormat eq $_ } @$validFormats; - } - $format //= $defFormat; - } - } else { - # can't happen for remote migration, so $volname is always defined - my $scfg = PVE::Storage::storage_config($storecfg, $storeid); - $format = qemu_img_format($scfg, $volname); - } + # 2. default format of storage + my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); + $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*; my $size = $drive->{size} / 1024; my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size); @@ -5753,7 +5663,7 @@ sub vm_start_nolock { print "Resuming suspended VM\n"; } - my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, + my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'}); my $migration_ip; @@ -5836,40 +5746,47 @@ sub vm_start_nolock { push @$cmd, '-S'; } - my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume); + my $memory = get_current_memory($conf->{memory}); + my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume); - my $pci_devices = {}; # host pci devices - for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) { - my $dev = $conf->{"hostpci$i"} or next; - $pci_devices->{$i} = parse_hostpci($dev); + my $pci_reserve_list = []; + for my $device (values $pci_devices->%*) { + next if $device->{mdev}; # we don't reserve for mdev devices + push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*; } - # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment - my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ]; - - # map to a flat list of pci ids - my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ]; - # reserve all PCI IDs before actually doing anything with them - PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout); + PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout); eval { my $uuid; for my $id (sort keys %$pci_devices) { my $d = $pci_devices->{$id}; - for my $dev ($d->{pciid}->@*) { - my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev}); - - # nvidia grid needs the qemu parameter '-uuid' set - # use smbios uuid or mdev uuid as fallback for that - if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') { - if (defined($conf->{smbios1})) { - my $smbios_conf = parse_smbios1($conf->{smbios1}); - $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid}); - } - $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id) if !defined($uuid); + my ($index) = ($id =~ m/^hostpci(\d+)$/); + + my $chosen_mdev; + for my $dev ($d->{ids}->@*) { + my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) }; + if ($d->{mdev}) { + warn $@ if $@; + $chosen_mdev = $info; + last if $chosen_mdev; # if successful, we're done + } else { + die $@ if $@; } } + + next if !$d->{mdev}; + die "could not create mediated device\n" if !defined($chosen_mdev); + + # nvidia grid needs the uuid of the mdev as qemu parameter + if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) { + if (defined($conf->{smbios1})) { + my $smbios_conf = parse_smbios1($conf->{smbios1}); + $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid}); + } + $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid); + } } push @$cmd, '-uuid', $uuid if defined($uuid); }; @@ -5881,9 +5798,10 @@ sub vm_start_nolock { PVE::Storage::activate_volumes($storecfg, $vollist); - eval { - run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{}); - }; + + my %silence_std_outs = (outfunc => sub {}, errfunc => sub {}); + eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) }; + eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) }; # Issues with the above 'stop' not being fully completed are extremely rare, a very low # timeout should be more than enough here... PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20); @@ -5926,7 +5844,7 @@ sub vm_start_nolock { PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties); my $tpmpid; - if (my $tpm = $conf->{tpmstate0}) { + if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) { # start the TPM emulator so QEMU can connect on start $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom); } @@ -5983,7 +5901,7 @@ sub vm_start_nolock { # re-reserve all PCI IDs now that we can know the actual VM PID my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid); - eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) }; + eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) }; warn $@ if $@; if (defined($res->{migrate})) { @@ -6178,9 +6096,7 @@ sub cleanup_pci_devices { # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error # out when we do it first. so wait for 10 seconds and then try it - my $pciid = $d->{pciid}->[0]->{id}; - my $info = PVE::SysFSTools::pci_device_info("$pciid"); - if ($info->{vendor} eq '10de') { + if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) { sleep 10; } @@ -6527,6 +6443,39 @@ sub check_bridge_access { return 1; }; +sub check_mapping_access { + my ($rpcenv, $user, $conf) = @_; + + for my $opt (keys $conf->%*) { + if ($opt =~ m/^usb\d+$/) { + my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt}); + if (my $host = $device->{host}) { + die "only root can set '$opt' config for real devices\n" + if $host !~ m/^spice$/i && $user ne 'root@pam'; + } elsif ($device->{mapping}) { + $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']); + } else { + die "either 'host' or 'mapping' must be set.\n"; + } + } elsif ($opt =~ m/^hostpci\d+$/) { + my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt}); + if ($device->{host}) { + die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam'; + } elsif ($device->{mapping}) { + $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']); + } else { + die "either 'host' or 'mapping' must be set.\n"; + } + } + } +}; + +sub check_restore_permissions { + my ($rpcenv, $user, $conf) = @_; + + check_bridge_access($rpcenv, $user, $conf); + check_mapping_access($rpcenv, $user, $conf); +} # vzdump restore implementaion sub tar_archive_read_firstfile { @@ -6843,7 +6792,7 @@ my $restore_destroy_volumes = sub { } }; -my $restore_merge_config = sub { +sub restore_merge_config { my ($filename, $backup_conf_raw, $override_conf) = @_; my $backup_conf = parse_vm_config($filename, $backup_conf_raw); @@ -6852,7 +6801,7 @@ my $restore_merge_config = sub { } return $backup_conf; -}; +} sub scan_volids { my ($cfg, $vmid) = @_; @@ -7170,8 +7119,8 @@ sub restore_proxmox_backup_archive { $new_conf_raw .= "\nlock: create"; } - my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf}); - check_bridge_access($rpcenv, $user, $new_conf); + my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf}); + check_restore_permissions($rpcenv, $user, $new_conf); PVE::QemuConfig->write_config($vmid, $new_conf); eval { rescan($vmid, 1); }; @@ -7336,9 +7285,6 @@ sub restore_vma_archive { $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]); - my $oldtimeout; - my $timeout = 5; - my $devinfo = {}; # info about drives included in backup my $virtdev_hash = {}; # info about allocated drives @@ -7432,6 +7378,8 @@ sub restore_vma_archive { $fh->close(); }; + my $oldtimeout; + eval { # enable interrupts local $SIG{INT} = @@ -7441,7 +7389,7 @@ sub restore_vma_archive { local $SIG{PIPE} = sub { die "interrupted by signal\n"; }; local $SIG{ALRM} = sub { die "got timeout\n"; }; - $oldtimeout = alarm($timeout); + $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one my $parser = sub { my $line = shift; @@ -7453,14 +7401,11 @@ sub restore_vma_archive { $devinfo->{$devname} = { size => $size, dev_id => $dev_id }; } elsif ($line =~ m/^CTIME: /) { # we correctly received the vma config, so we can disable - # the timeout now for disk allocation (set to 10 minutes, so - # that we always timeout if something goes wrong) - alarm(600); + # the timeout now for disk allocation + alarm($oldtimeout || 0); + $oldtimeout = undef; &$print_devmap(); print $fifofh "done\n"; - my $tmp = $oldtimeout || 0; - $oldtimeout = undef; - alarm($tmp); close($fifofh); $fifofh = undef; } @@ -7484,8 +7429,8 @@ sub restore_vma_archive { die $err; } - my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf}); - check_bridge_access($rpcenv, $user, $new_conf); + my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf}); + check_restore_permissions($rpcenv, $user, $new_conf); PVE::QemuConfig->write_config($vmid, $new_conf); eval { rescan($vmid, 1); }; @@ -8330,7 +8275,7 @@ sub generate_smbios1_uuid { sub nbd_stop { my ($vmid) = @_; - mon_cmd($vmid, 'nbd-server-stop'); + mon_cmd($vmid, 'nbd-server-stop', timeout => 25); } sub create_reboot_request { @@ -8560,13 +8505,17 @@ sub complete_migration_storage { } sub vm_is_paused { - my ($vmid) = @_; + my ($vmid, $include_suspended) = @_; my $qmpstatus = eval { PVE::QemuConfig::assert_config_exists_on_node($vmid); mon_cmd($vmid, "query-status"); }; warn "$@\n" if $@; - return $qmpstatus && $qmpstatus->{status} eq "paused"; + return $qmpstatus && ( + $qmpstatus->{status} eq "paused" || + $qmpstatus->{status} eq "prelaunch" || + ($include_suspended && $qmpstatus->{status} eq "suspended") + ); } sub check_volume_storage_type { @@ -8604,9 +8553,9 @@ sub add_nets_bridge_fdb { next; } if ($have_sdn) { - PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall}); + PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge); } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now - PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall}); + PVE::Network::add_bridge_fdb($iface, $mac); } } } @@ -8623,9 +8572,9 @@ sub del_nets_bridge_fdb { my $bridge = $net->{bridge}; if ($have_sdn) { - PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall}); + PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge); } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now - PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall}); + PVE::Network::del_bridge_fdb($iface, $mac); } } }