X-Git-Url: https://git.proxmox.com/?p=qemu-server.git;a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=28e630d30b21891a6c8203c3809c4f436985fc2e;hp=0fa43a74c71d737b8a0e1ff84fb134212e584eb0;hb=HEAD;hpb=621edb2b65d90afcd37eba56af63d5a0f93c97d3 diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 0fa43a7..5df0c96 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -34,6 +34,7 @@ use PVE::DataCenterConfig; use PVE::Exception qw(raise raise_param_exc); use PVE::Format qw(render_duration render_bytes); use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne); +use PVE::HA::Config; use PVE::Mapping::PCI; use PVE::Mapping::USB; use PVE::INotify; @@ -49,20 +50,22 @@ use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_for use PVE::QMPClient; use PVE::QemuConfig; -use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version); +use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version); use PVE::QemuServer::Cloudinit; use PVE::QemuServer::CGroup; -use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options); +use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options get_cpu_bitness is_native_arch); use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive); use PVE::QemuServer::Machine; -use PVE::QemuServer::Memory; +use PVE::QemuServer::Memory qw(get_current_memory); use PVE::QemuServer::Monitor qw(mon_cmd); use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci); +use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel); use PVE::QemuServer::USB; my $have_sdn; eval { require PVE::Network::SDN::Zones; + require PVE::Network::SDN::Vnets; $have_sdn = 1; }; @@ -85,6 +88,9 @@ my $OVMF = { "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd", "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd", ], + # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build + # anymore. how can we deperacate this sanely without breaking existing instances, or using + # older backups and snapshot? default => [ "$EDK2_FW_BASE/OVMF_CODE.fd", "$EDK2_FW_BASE/OVMF_VARS.fd", @@ -118,14 +124,6 @@ PVE::JSONSchema::register_standard_option('pve-qm-stateuri', { optional => 1, }); -PVE::JSONSchema::register_standard_option('pve-qemu-machine', { - description => "Specifies the QEMU machine type.", - type => 'string', - pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)', - maxLength => 40, - optional => 1, -}); - # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way my $nodename_cache; sub nodename { @@ -195,6 +193,13 @@ my $vga_fmt = { minimum => 4, maximum => 512, }, + clipboard => { + description => 'Enable a specific clipboard. If not set, depending on the display type the' + .' SPICE one will be added. Migration with VNC clipboard is not yet supported!', + type => 'string', + enum => ['vnc'], + optional => 1, + }, }; my $ivshmem_fmt = { @@ -349,11 +354,9 @@ my $confdesc = { }, memory => { optional => 1, - type => 'integer', - description => "Amount of RAM for the VM in MiB. This is the maximum available memory when" - ." you use the balloon device.", - minimum => 16, - default => 512, + type => 'string', + description => "Memory properties.", + format => $PVE::QemuServer::Memory::memory_fmt }, balloon => { optional => 1, @@ -401,6 +404,7 @@ my $confdesc = { ostype => { optional => 1, type => 'string', + # NOTE: When extending, also consider extending `%guest_types` in `Import/ESXi.pm`. enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)], description => "Specify guest operating system.", verbose_description => < { optional => 1, type => 'number', - description => "Set maximum tolerated downtime (in seconds) for migrations.", + description => "Set maximum tolerated downtime (in seconds) for migrations. Should the" + ." migration not be able to converge in the very end, because too much newly dirtied" + ." RAM needs to be transferred, the limit will be increased automatically step-by-step" + ." until migration can converge.", minimum => 0, default => 0.1, }, @@ -760,6 +767,7 @@ my $cicustom_fmt = { }; PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt); +# any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu my $confdesc_cloudinit = { citype => { optional => 1, @@ -785,7 +793,8 @@ my $confdesc_cloudinit = { ciupgrade => { optional => 1, type => 'boolean', - description => 'cloud-init: do an automatic package upgrade after the first boot.' + description => 'cloud-init: do an automatic package upgrade after the first boot.', + default => 1, }, cicustom => { optional => 1, @@ -840,44 +849,9 @@ while (my ($k, $v) = each %$confdesc) { my $MAX_NETS = 32; my $MAX_SERIAL_PORTS = 4; my $MAX_PARALLEL_PORTS = 3; -my $MAX_NUMA = 8; - -my $numa_fmt = { - cpus => { - type => "string", - pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/, - description => "CPUs accessing this NUMA node.", - format_description => "id[-id];...", - }, - memory => { - type => "number", - description => "Amount of memory this NUMA node provides.", - optional => 1, - }, - hostnodes => { - type => "string", - pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/, - description => "Host NUMA nodes to use.", - format_description => "id[-id];...", - optional => 1, - }, - policy => { - type => 'string', - enum => [qw(preferred bind interleave)], - description => "NUMA allocation policy.", - optional => 1, - }, -}; -PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt); -my $numadesc = { - optional => 1, - type => 'string', format => $numa_fmt, - description => "NUMA topology.", -}; -PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc); -for (my $i = 0; $i < $MAX_NUMA; $i++) { - $confdesc->{"numa$i"} = $numadesc; +for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) { + $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc; } my $nic_model_list = [ @@ -1373,64 +1347,19 @@ sub pve_verify_hotplug_features { die "unable to parse hotplug option\n"; } -sub scsi_inquiry { - my($fh, $noerr) = @_; - - my $SG_IO = 0x2285; - my $SG_GET_VERSION_NUM = 0x2282; - - my $versionbuf = "\x00" x 8; - my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf); - if (!$ret) { - die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr; - return; - } - my $version = unpack("I", $versionbuf); - if ($version < 30000) { - die "scsi generic interface too old\n" if !$noerr; - return; - } - - my $buf = "\x00" x 36; - my $sensebuf = "\x00" x 8; - my $cmd = pack("C x3 C x1", 0x12, 36); - - # see /usr/include/scsi/sg.h - my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I"; - - my $packet = pack( - $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000 - ); +sub assert_clipboard_config { + my ($vga) = @_; - $ret = ioctl($fh, $SG_IO, $packet); - if (!$ret) { - die "scsi ioctl SG_IO failed - $!\n" if !$noerr; - return; - } + my $clipboard_regex = qr/^(std|cirrus|vmware|virtio|qxl)/; - my @res = unpack($sg_io_hdr_t, $packet); - if ($res[17] || $res[18]) { - die "scsi ioctl SG_IO status error - $!\n" if !$noerr; - return; + if ( + $vga->{'clipboard'} + && $vga->{'clipboard'} eq 'vnc' + && $vga->{type} + && $vga->{type} !~ $clipboard_regex + ) { + die "vga type $vga->{type} is not compatible with VNC clipboard\n"; } - - my $res = {}; - $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf); - - $res->{removable} = $res->{removable} & 128 ? 1 : 0; - $res->{type} &= 0x1F; - - return $res; -} - -sub path_is_scsi { - my ($path) = @_; - - my $fh = IO::File->new("+<$path") || return; - my $res = scsi_inquiry($fh, 1); - close($fh); - - return $res; } sub print_tabletdevice_full { @@ -1477,52 +1406,52 @@ sub print_drivedevice_full { my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); my $unit = $drive->{index} % $maxdev; - my $devicetype = 'hd'; - my $path = ''; - if (drive_is_cdrom($drive)) { - $devicetype = 'cd'; - } else { - if ($drive->{file} =~ m|^/|) { - $path = $drive->{file}; - if (my $info = path_is_scsi($path)) { - if ($info->{type} == 0 && $drive->{scsiblock}) { - $devicetype = 'block'; - } elsif ($info->{type} == 1) { # tape - $devicetype = 'generic'; - } - } - } else { - $path = PVE::Storage::path($storecfg, $drive->{file}); - } - # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380) - my $version = extract_version($machine_type, kvm_user_version()); - if ($path =~ m/^iscsi\:\/\// && - !min_version($version, 4, 1)) { - $devicetype = 'generic'; - } - } + my $machine_version = extract_version($machine_type, kvm_user_version()); + my $device_type = PVE::QemuServer::Drive::get_scsi_device_type( + $drive, $storecfg, $machine_version); if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') { - $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit"; + $device = "scsi-$device_type,bus=$controller_prefix$controller.0,scsi-id=$unit"; } else { - $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0" + $device = "scsi-$device_type,bus=$controller_prefix$controller.0,channel=0,scsi-id=0" .",lun=$drive->{index}"; } $device .= ",drive=drive-$drive_id,id=$drive_id"; - if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) { + if ($drive->{ssd} && ($device_type eq 'block' || $device_type eq 'hd')) { $device .= ",rotation_rate=1"; } $device .= ",wwn=$drive->{wwn}" if $drive->{wwn}; + # only scsi-hd and scsi-cd support passing vendor and product information + if ($device_type eq 'hd' || $device_type eq 'cd') { + if (my $vendor = $drive->{vendor}) { + $device .= ",vendor=$vendor"; + } + if (my $product = $drive->{product}) { + $device .= ",product=$product"; + } + } + } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') { my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2; my $controller = int($drive->{index} / $maxdev); my $unit = $drive->{index} % $maxdev; - my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd"; - $device = "ide-$devicetype"; + # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled + # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than + # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to + # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they + # were before. Move odd ones up by 2 where they don't clash. + if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') { + $controller += 2 * ($unit % 2); + $unit = 0; + } + + my $device_type = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd"; + + $device = "ide-$device_type"; if ($drive->{interface} eq 'ide') { $device .= ",bus=ide.$controller,unit=$unit"; } else { @@ -1530,7 +1459,7 @@ sub print_drivedevice_full { } $device .= ",drive=drive-$drive_id,id=$drive_id"; - if ($devicetype eq 'hd') { + if ($device_type eq 'hd') { if (my $model = $drive->{model}) { $model = URI::Escape::uri_unescape($model); $device .= ",model=$model"; @@ -1604,7 +1533,7 @@ my sub drive_uses_cache_direct { } sub print_drive_commandline_full { - my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_; + my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_; my $path; my $volid = $drive->{file}; @@ -1616,7 +1545,7 @@ sub print_drive_commandline_full { if (drive_is_cdrom($drive)) { $path = get_iso_path($storecfg, $vmid, $volid); - die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name; + die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name; } else { if ($storeid) { $path = PVE::Storage::path($storecfg, $volid); @@ -1667,7 +1596,7 @@ sub print_drive_commandline_full { } } - if ($pbs_name) { + if ($live_restore_name) { $format = "rbd" if $is_rbd; die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n" if !$format; @@ -1707,18 +1636,18 @@ sub print_drive_commandline_full { # note: 'detect-zeroes' works per blockdev and we want it to persist # after the alloc-track is removed, so put it on 'file' directly - my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes"; + my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes"; $opts .= ",$dz_param=$detectzeroes" if $detectzeroes; } - if ($pbs_name) { - $opts .= ",backing=$pbs_name"; + if ($live_restore_name) { + $opts .= ",backing=$live_restore_name"; $opts .= ",auto-remove=on"; } - # my $file_param = $pbs_name ? "file.file.filename" : "file"; + # my $file_param = $live_restore_name ? "file.file.filename" : "file"; my $file_param = "file"; - if ($pbs_name) { + if ($live_restore_name) { # non-rbd drivers require the underlying file to be a seperate block # node, so add a second .file indirection $file_param .= ".file" if !$is_rbd; @@ -1761,7 +1690,7 @@ sub print_netdevice_full { } if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){ - $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024"; + $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256"; } $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ; @@ -1820,7 +1749,7 @@ sub print_netdev_full { if length($ifname) >= 16; my $vhostparam = ''; - if (is_native($arch)) { + if (is_native_arch($arch)) { $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio'; } @@ -1870,7 +1799,7 @@ sub print_vga_device { } } - die "no devicetype for $vga->{type}\n" if !$type; + die "no device-type for $vga->{type}\n" if !$type; my $memory = ""; if ($vgamem_mb) { @@ -1917,29 +1846,6 @@ sub print_vga_device { return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}"; } -sub parse_number_sets { - my ($set) = @_; - my $res = []; - foreach my $part (split(/;/, $set)) { - if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) { - die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1; - push @$res, [ $1, $2 ]; - } else { - die "invalid range: $part\n"; - } - } - return $res; -} - -sub parse_numa { - my ($data) = @_; - - my $res = parse_property_string($numa_fmt, $data); - $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus}); - $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes}); - return $res; -} - # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate= sub parse_net { my ($data, $disable_mac_autogen) = @_; @@ -2191,8 +2097,9 @@ sub qemu_created_version_fixups { # check if we need to apply some handling for VMs that always use the latest machine version but # had a machine version transition happen that affected HW such that, e.g., an OS config change # would be required (we do not want to pin machine version for non-windows OS type) + my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine}); if ( - (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine + (!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations && min_version($kvmver, 6, 1) # only need to apply the change since 6.1 @@ -2383,6 +2290,9 @@ sub destroy_vm { }); } + eval { delete_ifaces_ipams_ips($conf, $vmid)}; + warn $@ if $@; + if (defined $replacement_conf) { PVE::QemuConfig->write_config($vmid, $replacement_conf); } else { @@ -2683,7 +2593,7 @@ sub check_local_resources { foreach my $k (keys %$conf) { if ($k =~ m/^usb/) { my $entry = parse_property_string('pve-qm-usb', $conf->{$k}); - next if $entry->{host} =~ m/^spice$/i; + next if $entry->{host} && $entry->{host} =~ m/^spice$/i; if ($entry->{mapping}) { $add_missing_mapping->('usb', $k, $entry->{mapping}); push @$mapped_res, $k; @@ -2947,8 +2857,7 @@ sub vmstatus { $d->{cpus} = $conf->{vcpus} if $conf->{vcpus}; $d->{name} = $conf->{name} || "VM $vmid"; - $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) - : $defaults->{memory}*(1024*1024); + $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024); if ($conf->{balloon}) { $d->{balloon_min} = $conf->{balloon}*(1024*1024); @@ -3304,11 +3213,6 @@ sub vga_conf_has_spice { return $1 || 1; } -sub is_native($) { - my ($arch) = @_; - return get_host_arch() eq $arch; -} - sub get_vm_arch { my ($conf) = @_; return $conf->{arch} // get_host_arch(); @@ -3351,7 +3255,8 @@ sub windows_get_pinned_machine_version { sub get_vm_machine { my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_; - my $machine = $forcemachine || $conf->{machine}; + my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine}); + my $machine = $forcemachine || $machine_conf->{type}; if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) { $kvmversion //= kvm_user_version(); @@ -3390,9 +3295,13 @@ sub get_ovmf_files($$$) { or die "no OVMF images known for architecture '$arch'\n"; my $type = 'default'; - if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') { - $type = $smm ? "4m" : "4m-no-smm"; - $type .= '-ms' if $efidisk->{'pre-enrolled-keys'}; + if ($arch eq 'x86_64') { + if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') { + $type = $smm ? "4m" : "4m-no-smm"; + $type .= '-ms' if $efidisk->{'pre-enrolled-keys'}; + } else { + # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9 + } } my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*; @@ -3408,7 +3317,7 @@ my $Arch2Qemu = { }; sub get_command_for_arch($) { my ($arch) = @_; - return '/usr/bin/kvm' if is_native($arch); + return '/usr/bin/kvm' if is_native_arch($arch); my $cmd = $Arch2Qemu->{$arch} or die "don't know how to emulate architecture '$arch'\n"; @@ -3586,7 +3495,7 @@ my sub print_ovmf_drive_commandlines { sub config_to_command { my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu, - $pbs_backing) = @_; + $live_restore_backing) = @_; my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []); my $devices = []; @@ -3596,6 +3505,8 @@ sub config_to_command { my $kvm = $conf->{kvm}; my $nodename = nodename(); + my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine}); + my $arch = get_vm_arch($conf); my $kvm_binary = get_command_for_arch($arch); my $kvmver = kvm_user_version($kvm_binary); @@ -3609,7 +3520,7 @@ sub config_to_command { my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version); my $machine_version = extract_version($machine_type, $kvmver); - $kvm //= 1 if is_native($arch); + $kvm //= 1 if is_native_arch($arch); $machine_version =~ m/(\d+)\.(\d+)/; my ($machine_major, $machine_minor) = ($1, $2); @@ -3703,6 +3614,9 @@ sub config_to_command { } if ($conf->{bios} && $conf->{bios} eq 'ovmf') { + die "OVMF (UEFI) BIOS is not supported on 32-bit CPU types\n" + if !$forcecpu && get_cpu_bitness($conf->{cpu}, $arch) == 32; + my ($code_drive_str, $var_drive_str) = print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard); push $cmd->@*, '-drive', $code_drive_str; @@ -3810,7 +3724,9 @@ sub config_to_command { push @$devices, @$audio_devs; } - add_tpm_device($vmid, $devices, $conf); + # Add a TPM only if the VM is not a template, + # to support backing up template VMs even if the TPM disk is write-protected. + add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf)); my $sockets = 1; $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused @@ -3829,7 +3745,7 @@ sub config_to_command { if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) { push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus"; for (my $i = 2; $i <= $vcpus; $i++) { - my $cpustr = print_cpu_device($conf,$i); + my $cpustr = print_cpu_device($conf, $arch, $i); push @$cmd, '-device', $cpustr; } @@ -3891,7 +3807,7 @@ sub config_to_command { } PVE::QemuServer::Memory::config( - $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd); + $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd); push @$cmd, '-S' if $conf->{freeze}; @@ -3930,7 +3846,10 @@ sub config_to_command { my $spice_port; - if ($qxlnum || $vga->{type} =~ /^virtio/) { + assert_clipboard_config($vga); + my $is_spice = $qxlnum || $vga->{type} =~ /^virtio/; + + if ($is_spice || ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc')) { if ($qxlnum > 1) { if ($winversion){ for (my $i = 1; $i < $qxlnum; $i++){ @@ -3951,29 +3870,34 @@ sub config_to_command { my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type); - my $pfamily = PVE::Tools::get_host_address_family($nodename); - my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily); - die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs; - push @$devices, '-device', "virtio-serial,id=spice$pciaddr"; - push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent"; + if ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc') { + push @$devices, '-chardev', 'qemu-vdagent,id=vdagent,name=vdagent,clipboard=on'; + } else { + push @$devices, '-chardev', 'spicevmc,id=vdagent,name=vdagent'; + } push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0"; - my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr}); - $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost); + if ($is_spice) { + my $pfamily = PVE::Tools::get_host_address_family($nodename); + my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily); + die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs; - my $spice_enhancement_str = $conf->{spice_enhancements} // ''; - my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str); - if ($spice_enhancement->{foldersharing}) { - push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0"; - push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0"; - } + my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr}); + $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost); - my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on"; - $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}" - if $spice_enhancement->{videostreaming}; + my $spice_enhancement_str = $conf->{spice_enhancements} // ''; + my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str); + if ($spice_enhancement->{foldersharing}) { + push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0"; + push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0"; + } - push @$devices, '-spice', "$spice_opts"; + my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on"; + $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}" + if $spice_enhancement->{videostreaming}; + push @$devices, '-spice', "$spice_opts"; + } } # enable balloon by default, unless explicitly disabled @@ -4061,15 +3985,15 @@ sub config_to_command { $ahcicontroller->{$controller}=1; } - my $pbs_conf = $pbs_backing->{$ds}; - my $pbs_name = undef; - if ($pbs_conf) { - $pbs_name = "drive-$ds-pbs"; - push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name); + my $live_restore = $live_restore_backing->{$ds}; + my $live_blockdev_name = undef; + if ($live_restore) { + $live_blockdev_name = $live_restore->{name}; + push @$devices, '-blockdev', $live_restore->{blockdev}; } my $drive_cmd = print_drive_commandline_full( - $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0)); + $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0)); # extra protection for templates, but SATA and IDE don't support it.. $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive); @@ -4159,6 +4083,17 @@ sub config_to_command { } push @$machineFlags, "type=${machine_type_min}"; + PVE::QemuServer::Machine::assert_valid_machine_property($conf, $machine_conf); + + if (my $viommu = $machine_conf->{viommu}) { + if ($viommu eq 'intel') { + unshift @$devices, '-device', 'intel-iommu,intremap=on,caching-mode=on'; + push @$machineFlags, 'kernel-irqchip=split'; + } elsif ($viommu eq 'virtio') { + push @$devices, '-device', 'virtio-iommu-pci'; + } + } + push @$cmd, @$devices; push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags); @@ -4420,21 +4355,6 @@ sub qemu_spice_usbredir_chardev_add { )); } -sub qemu_deviceadd { - my ($vmid, $devicefull) = @_; - - $devicefull = "driver=".$devicefull; - my %options = split(/[=,]/, $devicefull); - - mon_cmd($vmid, "device_add" , %options); -} - -sub qemu_devicedel { - my ($vmid, $deviceid) = @_; - - my $ret = mon_cmd($vmid, "device_del", id => $deviceid); -} - sub qemu_iothread_add { my ($vmid, $deviceid, $device) = @_; @@ -4453,22 +4373,6 @@ sub qemu_iothread_del { } } -sub qemu_objectadd { - my ($vmid, $objectid, $qomtype) = @_; - - mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype); - - return 1; -} - -sub qemu_objectdel { - my ($vmid, $objectid) = @_; - - mon_cmd($vmid, "object-del", id => $objectid); - - return 1; -} - sub qemu_driveadd { my ($storecfg, $vmid, $device) = @_; @@ -4694,9 +4598,10 @@ sub qemu_cpu_hotplug { if scalar(@{$currentrunningvcpus}) != $currentvcpus; if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) { + my $arch = get_vm_arch($conf); for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) { - my $cpustr = print_cpu_device($conf, $i); + my $cpustr = print_cpu_device($conf, $arch, $i); qemu_deviceadd($vmid, $cpustr); my $retry = 0; @@ -4786,24 +4691,29 @@ sub qemu_volume_snapshot { } sub qemu_volume_snapshot_delete { - my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_; + my ($vmid, $storecfg, $volid, $snap) = @_; my $running = check_running($vmid); + my $attached_deviceid; - if($running) { - - $running = undef; + if ($running) { my $conf = PVE::QemuConfig->load_config($vmid); PVE::QemuConfig->foreach_volume($conf, sub { my ($ds, $drive) = @_; - $running = 1 if $drive->{file} eq $volid; + $attached_deviceid = "drive-$ds" if $drive->{file} eq $volid; }); } - if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) { - mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap); + if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) { + mon_cmd( + $vmid, + 'blockdev-snapshot-delete-internal-sync', + device => $attached_deviceid, + name => $snap, + ); } else { - PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running); + PVE::Storage::volume_snapshot_delete( + $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef); } } @@ -4844,7 +4754,7 @@ sub foreach_volid { my $volhash = {}; my $test_volid = sub { - my ($key, $drive, $snapname) = @_; + my ($key, $drive, $snapname, $pending) = @_; my $volid = $drive->{file}; return if !$volid; @@ -4859,12 +4769,18 @@ sub foreach_volid { $volhash->{$volid}->{shared} //= 0; $volhash->{$volid}->{shared} = 1 if $drive->{shared}; - $volhash->{$volid}->{referenced_in_config} //= 0; - $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname); + $volhash->{$volid}->{is_unused} //= 0; + $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/; + + $volhash->{$volid}->{is_attached} //= 0; + $volhash->{$volid}->{is_attached} = 1 + if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending; $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1 if defined($snapname); + $volhash->{$volid}->{referenced_in_pending} = 1 if $pending; + my $size = $drive->{size}; $volhash->{$volid}->{size} //= $size if $size; @@ -4874,9 +4790,6 @@ sub foreach_volid { $volhash->{$volid}->{is_tpmstate} //= 0; $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0'; - $volhash->{$volid}->{is_unused} //= 0; - $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/; - $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key); }; @@ -4886,6 +4799,10 @@ sub foreach_volid { }; PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid); + + PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1) + if defined($conf->{pending}) && $conf->{pending}->%*; + foreach my $snapname (keys %{$conf->{snapshots}}) { my $snap = $conf->{snapshots}->{$snapname}; PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname); @@ -5030,7 +4947,7 @@ sub vmconfig_hotplug_pending { my $force = $pending_delete_hash->{$opt}->{force}; eval { if ($opt eq 'hotplug') { - die "skip\n" if ($conf->{hotplug} =~ /memory/); + die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/); } elsif ($opt eq 'tablet') { die "skip\n" if !$hotplug_features->{usb}; if ($defaults->{tablet}) { @@ -5053,20 +4970,24 @@ sub vmconfig_hotplug_pending { # enable balloon device is not hotpluggable die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0; # here we reset the ballooning value to memory - my $balloon = $conf->{memory} || $defaults->{memory}; + my $balloon = get_current_memory($conf->{memory}); mon_cmd($vmid, "balloon", value => $balloon*1024*1024); } elsif ($fast_plug_option->{$opt}) { # do nothing } elsif ($opt =~ m/^net(\d+)$/) { die "skip\n" if !$hotplug_features->{network}; vm_deviceunplug($vmid, $conf, $opt); + if($have_sdn) { + my $net = PVE::QemuServer::parse_net($conf->{$opt}); + PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}); + } } elsif (is_valid_drivename($opt)) { die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/; vm_deviceunplug($vmid, $conf, $opt); vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); } elsif ($opt =~ m/^memory$/) { die "skip\n" if !$hotplug_features->{memory}; - PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults); + PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf); } elsif ($opt eq 'cpuunits') { $cgroup->change_cpu_shares(undef); } elsif ($opt eq 'cpulimit') { @@ -5091,6 +5012,7 @@ sub vmconfig_hotplug_pending { eval { if ($opt eq 'hotplug') { die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/); + die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/); } elsif ($opt eq 'tablet') { die "skip\n" if !$hotplug_features->{usb}; if ($value == 1) { @@ -5121,7 +5043,8 @@ sub vmconfig_hotplug_pending { # allow manual ballooning if shares is set to zero if ((defined($conf->{shares}) && ($conf->{shares} == 0))) { - my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory}; + my $memory = get_current_memory($conf->{memory}); + my $balloon = $conf->{pending}->{balloon} || $memory; mon_cmd($vmid, "balloon", value => $balloon*1024*1024); } } elsif ($opt =~ m/^net(\d+)$/) { @@ -5141,7 +5064,7 @@ sub vmconfig_hotplug_pending { $vmid, $opt, $value, $arch, $machine_type); } elsif ($opt =~ m/^memory$/) { #dimms die "skip\n" if !$hotplug_features->{memory}; - $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value); + $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value); } elsif ($opt eq 'cpuunits') { my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp $cgroup->change_cpu_shares($new_cpuunits); @@ -5263,6 +5186,12 @@ sub vmconfig_apply_pending { die "internal error"; } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) { vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); + } elsif (defined($conf->{$opt}) && $opt =~ m/^net\d+$/) { + if($have_sdn) { + my $net = PVE::QemuServer::parse_net($conf->{$opt}); + eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) }; + warn if $@; + } } }; if (my $err = $@) { @@ -5282,6 +5211,22 @@ sub vmconfig_apply_pending { eval { if (defined($conf->{$opt}) && is_valid_drivename($opt)) { vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt})) + } elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) { + return if !$have_sdn; # return from eval if SDN is not available + + my $new_net = PVE::QemuServer::parse_net($conf->{pending}->{$opt}); + if ($conf->{$opt}) { + my $old_net = PVE::QemuServer::parse_net($conf->{$opt}); + + if (defined($old_net->{bridge}) && defined($old_net->{macaddr}) && ( + safe_string_ne($old_net->{bridge}, $new_net->{bridge}) || + safe_string_ne($old_net->{macaddr}, $new_net->{macaddr}) + )) { + PVE::Network::SDN::Vnets::del_ips_from_mac($old_net->{bridge}, $old_net->{macaddr}, $conf->{name}); + } + } + #fixme: reuse ip if mac change && same bridge + PVE::Network::SDN::Vnets::add_next_free_cidr($new_net->{bridge}, $conf->{name}, $new_net->{macaddr}, $vmid, undef, 1); } }; if (my $err = $@) { @@ -5320,11 +5265,17 @@ sub vmconfig_update_net { safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) || safe_num_ne($oldnet->{queues}, $newnet->{queues}) || safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) || - !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change + !($newnet->{bridge} && $oldnet->{bridge}) + ) { # bridge/nat mode change # for non online change, we try to hot-unplug die "skip\n" if !$hotplug; vm_deviceunplug($vmid, $conf, $opt); + + if ($have_sdn) { + PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name}); + } + } else { die "internal error" if $opt !~ m/net(\d+)/; @@ -5333,14 +5284,37 @@ sub vmconfig_update_net { if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || safe_num_ne($oldnet->{tag}, $newnet->{tag}) || safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) || - safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) { + safe_num_ne($oldnet->{firewall}, $newnet->{firewall}) + ) { PVE::Network::tap_unplug($iface); + #set link_down in guest if bridge or vlan change to notify guest (dhcp renew for example) + if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || + safe_num_ne($oldnet->{tag}, $newnet->{tag}) + ) { + qemu_set_link_status($vmid, $opt, 0); + } + + if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge})) { + if ($have_sdn) { + PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name}); + PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1); + } + } + if ($have_sdn) { PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); } else { PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate}); } + + #set link_up in guest if bridge or vlan change to notify guest (dhcp renew for example) + if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || + safe_num_ne($oldnet->{tag}, $newnet->{tag}) + ) { + qemu_set_link_status($vmid, $opt, 1); + } + } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) { # Rate can be applied on its own but any change above needs to # include the rate in tap_plug since OVS resets everything. @@ -5356,6 +5330,10 @@ sub vmconfig_update_net { } if ($hotplug) { + if ($have_sdn) { + PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1); + PVE::Network::SDN::Vnets::add_dhcp_mapping($newnet->{bridge}, $newnet->{macaddr}, $vmid, $conf->{name}); + } vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type); } else { die "skip\n"; @@ -5413,8 +5391,10 @@ sub vmconfig_update_disk { safe_string_ne($drive->{discard}, $old_drive->{discard}) || safe_string_ne($drive->{iothread}, $old_drive->{iothread}) || safe_string_ne($drive->{queues}, $old_drive->{queues}) || + safe_string_ne($drive->{product}, $old_drive->{product}) || safe_string_ne($drive->{cache}, $old_drive->{cache}) || safe_string_ne($drive->{ssd}, $old_drive->{ssd}) || + safe_string_ne($drive->{vendor}, $old_drive->{vendor}) || safe_string_ne($drive->{ro}, $old_drive->{ro})) { die "skip\n"; } @@ -5546,9 +5526,11 @@ sub vm_migrate_get_nbd_disks { my $scfg = PVE::Storage::storage_config($storecfg, $storeid); return if $scfg->{shared}; + my $format = qemu_img_format($scfg, $volname); + # replicated disks re-use existing state via bitmap my $use_existing = $replicated_volumes->{$volid} ? 1 : 0; - $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing]; + $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format]; }); return $local_volumes; } @@ -5559,7 +5541,7 @@ sub vm_migrate_alloc_nbd_disks { my $nbd = {}; foreach my $opt (sort keys %$source_volumes) { - my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}}; + my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}}; if ($use_existing) { $nbd->{$opt}->{drivestr} = print_drive($drive); @@ -5568,29 +5550,13 @@ sub vm_migrate_alloc_nbd_disks { next; } - # storage mapping + volname = regular migration - # storage mapping + format = remote migration + $storeid = PVE::JSONSchema::map_id($storagemap, $storeid); + # order of precedence, filtered by whether storage supports it: # 1. explicit requested format - # 2. format of current volume - # 3. default format of storage - if (!$storagemap->{identity}) { - $storeid = PVE::JSONSchema::map_id($storagemap, $storeid); - my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); - if (!$format || !grep { $format eq $_ } @$validFormats) { - if ($volname) { - my $scfg = PVE::Storage::storage_config($storecfg, $storeid); - my $fileFormat = qemu_img_format($scfg, $volname); - $format = $fileFormat - if grep { $fileFormat eq $_ } @$validFormats; - } - $format //= $defFormat; - } - } else { - # can't happen for remote migration, so $volname is always defined - my $scfg = PVE::Storage::storage_config($storecfg, $storeid); - $format = qemu_img_format($scfg, $volname); - } + # 2. default format of storage + my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); + $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*; my $size = $drive->{size} / 1024; my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size); @@ -5659,12 +5625,10 @@ sub vm_start { # timeout => in seconds # paused => start VM in paused state (backup) # resume => resume from hibernation -# pbs-backing => { +# live-restore-backing => { # sata0 => { -# repository -# snapshot -# keyfile -# archive +# name => blockdev-name, +# blockdev => "arg to the -blockdev command instantiating device named 'name'", # }, # virtio2 => ... # } @@ -5738,7 +5702,7 @@ sub vm_start_nolock { } my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid, - $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'}); + $conf, $defaults, $forcemachine, $forcecpu, $params->{'live-restore-backing'}); my $migration_ip; my $get_migration_ip = sub { @@ -5791,10 +5755,9 @@ sub vm_start_nolock { $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr}); } - my $pfamily = PVE::Tools::get_host_address_family($nodename); - $migrate->{port} = PVE::Tools::next_migrate_port($pfamily); - $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}"; - push @$cmd, '-incoming', $migrate->{uri}; + # see #4501: port reservation should be done close to usage - tell QEMU where to listen + # via QMP later + push @$cmd, '-incoming', 'defer'; push @$cmd, '-S'; } elsif ($statefile eq 'unix') { @@ -5820,7 +5783,8 @@ sub vm_start_nolock { push @$cmd, '-S'; } - my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume); + my $memory = get_current_memory($conf->{memory}); + my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume); my $pci_reserve_list = []; for my $device (values $pci_devices->%*) { @@ -5871,9 +5835,10 @@ sub vm_start_nolock { PVE::Storage::activate_volumes($storecfg, $vollist); - eval { - run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{}); - }; + + my %silence_std_outs = (outfunc => sub {}, errfunc => sub {}); + eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) }; + eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) }; # Issues with the above 'stop' not being fully completed are extremely rare, a very low # timeout should be more than enough here... PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20); @@ -5916,7 +5881,7 @@ sub vm_start_nolock { PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties); my $tpmpid; - if (my $tpm = $conf->{tpmstate0}) { + if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) { # start the TPM emulator so QEMU can connect on start $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom); } @@ -5976,8 +5941,15 @@ sub vm_start_nolock { eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) }; warn $@ if $@; - if (defined($res->{migrate})) { - print "migration listens on $res->{migrate}->{uri}\n"; + if (defined(my $migrate = $res->{migrate})) { + if ($migrate->{proto} eq 'tcp') { + my $nodename = nodename(); + my $pfamily = PVE::Tools::get_host_address_family($nodename); + $migrate->{port} = PVE::Tools::next_migrate_port($pfamily); + $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}"; + mon_cmd($vmid, "migrate-incoming", uri => $migrate->{uri}); + } + print "migration listens on $migrate->{uri}\n"; } elsif ($statefile) { eval { mon_cmd($vmid, "cont"); }; warn $@ if $@; @@ -6093,6 +6065,15 @@ sub vm_start_nolock { PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start'); + my ($current_machine, $is_deprecated) = + PVE::QemuServer::Machine::get_current_qemu_machine($vmid); + if ($is_deprecated) { + log_warn( + "current machine version '$current_machine' is deprecated - see the documentation and ". + "change to a newer one", + ); + } + return $res; } @@ -6167,12 +6148,20 @@ sub cleanup_pci_devices { my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid"; # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error - # out when we do it first. so wait for 10 seconds and then try it - if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) { - sleep 10; + # out when we do it first. so wait for up to 10 seconds and then try it manually + if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/ && -e $dev_sysfs_dir) { + my $count = 0; + while (-e $dev_sysfs_dir && $count < 10) { + sleep 1; + $count++; + } + print "waited $count seconds for mediated device driver finishing clean up\n"; } - PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir; + if (-e $dev_sysfs_dir) { + print "actively clean up mediated device with UUID $uuid\n"; + PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1"); + } } } PVE::QemuServer::PCI::remove_pci_reservation($vmid); @@ -6417,7 +6406,8 @@ sub vm_suspend { if ($err) { # cleanup, but leave suspending lock, to indicate something went wrong eval { - mon_cmd($vmid, "savevm-end"); + eval { mon_cmd($vmid, "savevm-end"); }; + warn $@ if $@; PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); PVE::Storage::vdisk_free($storecfg, $vmstate); delete $conf->@{qw(vmstate runningmachine runningcpu)}; @@ -7233,20 +7223,27 @@ sub pbs_live_restore { print "starting VM for live-restore\n"; print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n"; - my $pbs_backing = {}; + my $live_restore_backing = {}; for my $ds (keys %$restored_disks) { $ds =~ m/^drive-(.*)$/; my $confname = $1; - $pbs_backing->{$confname} = { + my $pbs_conf = {}; + $pbs_conf = { repository => $opts->{repo}, snapshot => $opts->{snapshot}, archive => "$ds.img.fidx", }; - $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile}; - $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace}); + $pbs_conf->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile}; + $pbs_conf->{namespace} = $opts->{namespace} if defined($opts->{namespace}); my $drive = parse_drive($confname, $conf->{$confname}); print "restoring '$ds' to '$drive->{file}'\n"; + + my $pbs_name = "drive-${confname}-pbs"; + $live_restore_backing->{$confname} = { + name => $pbs_name, + blockdev => print_pbs_blockdev($pbs_conf, $pbs_name), + }; } my $drives_streamed = 0; @@ -7258,7 +7255,7 @@ sub pbs_live_restore { # start VM with backing chain pointing to PBS backup, environment vars for PBS driver # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller - vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {}); + vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {}); my $qmeventd_fd = register_qmeventd_handle($vmid); @@ -7298,6 +7295,93 @@ sub pbs_live_restore { } } +# Inspired by pbs live-restore, this restores with the disks being available as files. +# Theoretically this can also be used to quick-start a full-clone vm if the +# disks are all available as files. +# +# The mapping should provide a path by config entry, such as +# `{ scsi0 => { format => , path => "/path/to/file", sata1 => ... } }` +# +# This is used when doing a `create` call with the `--live-import` parameter, +# where the disks get an `import-from=` property. The non-live part is +# therefore already handled in the `$create_disks()` call happening in the +# `create` api call +sub live_import_from_files { + my ($mapping, $vmid, $conf, $restore_options) = @_; + + my $live_restore_backing = {}; + for my $dev (keys %$mapping) { + die "disk not support for live-restoring: '$dev'\n" + if !is_valid_drivename($dev) || $dev =~ /^(?:efidisk|tpmstate)/; + + die "mapping contains disk '$dev' which does not exist in the config\n" + if !exists($conf->{$dev}); + + my $info = $mapping->{$dev}; + my ($format, $path) = $info->@{qw(format path)}; + die "missing path for '$dev' mapping\n" if !$path; + die "missing format for '$dev' mapping\n" if !$format; + die "invalid format '$format' for '$dev' mapping\n" + if !grep { $format eq $_ } qw(raw qcow2 vmdk); + + $live_restore_backing->{$dev} = { + name => "drive-$dev-restore", + blockdev => "driver=$format,node-name=drive-$dev-restore" + . ",read-only=on" + . ",file.driver=file,file.filename=$path" + }; + }; + + my $storecfg = PVE::Storage::config(); + eval { + + # make sure HA doesn't interrupt our restore by stopping the VM + if (PVE::HA::Config::vm_is_ha_managed($vmid)) { + run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']); + } + + vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {}); + + # prevent shutdowns from qmeventd when the VM powers off from the inside + my $qmeventd_fd = register_qmeventd_handle($vmid); + + # begin streaming, i.e. data copy from PBS to target disk for every vol, + # this will effectively collapse the backing image chain consisting of + # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track + # removes itself once all backing images vanish with 'auto-remove=on') + my $jobs = {}; + for my $ds (sort keys %$live_restore_backing) { + my $job_id = "restore-$ds"; + mon_cmd($vmid, 'block-stream', + 'job-id' => $job_id, + device => "drive-$ds", + ); + $jobs->{$job_id} = {}; + } + + mon_cmd($vmid, 'cont'); + qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream'); + + print "restore-drive jobs finished successfully, removing all tracking block devices\n"; + + for my $ds (sort keys %$live_restore_backing) { + mon_cmd($vmid, 'blockdev-del', 'node-name' => "drive-$ds-restore"); + } + + close($qmeventd_fd); + }; + + my $err = $@; + + if ($err) { + warn "An error occurred during live-restore: $err\n"; + _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1); + die "live-restore failed\n"; + } + + PVE::QemuConfig->remove_lock($vmid, "import"); +} + sub restore_vma_archive { my ($archive, $vmid, $user, $opts, $comp) = @_; @@ -7357,9 +7441,6 @@ sub restore_vma_archive { $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]); - my $oldtimeout; - my $timeout = 5; - my $devinfo = {}; # info about drives included in backup my $virtdev_hash = {}; # info about allocated drives @@ -7453,6 +7534,8 @@ sub restore_vma_archive { $fh->close(); }; + my $oldtimeout; + eval { # enable interrupts local $SIG{INT} = @@ -7462,7 +7545,7 @@ sub restore_vma_archive { local $SIG{PIPE} = sub { die "interrupted by signal\n"; }; local $SIG{ALRM} = sub { die "got timeout\n"; }; - $oldtimeout = alarm($timeout); + $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one my $parser = sub { my $line = shift; @@ -7474,14 +7557,11 @@ sub restore_vma_archive { $devinfo->{$devname} = { size => $size, dev_id => $dev_id }; } elsif ($line =~ m/^CTIME: /) { # we correctly received the vma config, so we can disable - # the timeout now for disk allocation (set to 10 minutes, so - # that we always timeout if something goes wrong) - alarm(600); + # the timeout now for disk allocation + alarm($oldtimeout || 0); + $oldtimeout = undef; &$print_devmap(); print $fifofh "done\n"; - my $tmp = $oldtimeout || 0; - $oldtimeout = undef; - alarm($tmp); close($fifofh); $fifofh = undef; } @@ -7806,7 +7886,11 @@ sub qemu_img_convert { sub qemu_img_format { my ($scfg, $volname) = @_; - if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) { + # FIXME: this entire function is kind of weird given that `parse_volname` + # also already gives us a format? + my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi'; + + if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) { return $1; } else { return "raw"; @@ -8074,7 +8158,8 @@ sub clone_disk { my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)}; my ($storage, $format) = $dest->@{qw(storage format)}; - my $use_drive_mirror = $full && $running && $src_drivename && !$snapname; + my $unused = defined($src_drivename) && $src_drivename =~ /^unused/; + my $use_drive_mirror = $full && $running && $src_drivename && !$snapname && !$unused; if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) { die "cloning from/to EFI disk requires EFI disk\n" @@ -8180,7 +8265,7 @@ no_data_clone: my $disk = dclone($drive); delete $disk->{format}; $disk->{file} = $newvolid; - $disk->{size} = $size if defined($size); + $disk->{size} = $size if defined($size) && !$unused; return $disk; } @@ -8351,7 +8436,7 @@ sub generate_smbios1_uuid { sub nbd_stop { my ($vmid) = @_; - mon_cmd($vmid, 'nbd-server-stop'); + mon_cmd($vmid, 'nbd-server-stop', timeout => 25); } sub create_reboot_request { @@ -8581,13 +8666,17 @@ sub complete_migration_storage { } sub vm_is_paused { - my ($vmid) = @_; + my ($vmid, $include_suspended) = @_; my $qmpstatus = eval { PVE::QemuConfig::assert_config_exists_on_node($vmid); mon_cmd($vmid, "query-status"); }; warn "$@\n" if $@; - return $qmpstatus && $qmpstatus->{status} eq "paused"; + return $qmpstatus && ( + $qmpstatus->{status} eq "paused" || + $qmpstatus->{status} eq "prelaunch" || + ($include_suspended && $qmpstatus->{status} eq "suspended") + ); } sub check_volume_storage_type { @@ -8625,9 +8714,9 @@ sub add_nets_bridge_fdb { next; } if ($have_sdn) { - PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall}); + PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge); } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now - PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall}); + PVE::Network::add_bridge_fdb($iface, $mac); } } } @@ -8644,9 +8733,38 @@ sub del_nets_bridge_fdb { my $bridge = $net->{bridge}; if ($have_sdn) { - PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall}); + PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge); } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now - PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall}); + PVE::Network::del_bridge_fdb($iface, $mac); + } + } +} + +sub create_ifaces_ipams_ips { + my ($conf, $vmid) = @_; + + return if !$have_sdn; + + foreach my $opt (keys %$conf) { + if ($opt =~ m/^net(\d+)$/) { + my $value = $conf->{$opt}; + my $net = PVE::QemuServer::parse_net($value); + eval { PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{name}, $net->{macaddr}, $vmid, undef, 1) }; + warn $@ if $@; + } + } +} + +sub delete_ifaces_ipams_ips { + my ($conf, $vmid) = @_; + + return if !$have_sdn; + + foreach my $opt (keys %$conf) { + if ($opt =~ m/^net(\d+)$/) { + my $net = PVE::QemuServer::parse_net($conf->{$opt}); + eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) }; + warn $@ if $@; } } }