X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=fd854c4e1a984897b79f7e062773ead17bb9b97a;hb=6dde5ea273c9a62f5a0924a2e99a27a9fdfc2292;hp=887fb9e6bad6fa4d20ee975f7cd775cbf6ecbf22;hpb=6f0cb675888fbdc6cb388faef9b48152c27b3eb2;p=qemu-server.git diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 887fb9e..fd854c4 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -2,6 +2,7 @@ package PVE::QemuServer; use strict; use warnings; + use POSIX; use IO::Handle; use IO::Select; @@ -30,10 +31,12 @@ use PVE::ProcFSTools; use PVE::QemuConfig; use PVE::QMPClient; use PVE::RPCEnvironment; +use PVE::GuestHelpers; use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr); use PVE::QemuServer::Memory; use PVE::QemuServer::USB qw(parse_usb_device); use PVE::QemuServer::Cloudinit; +use PVE::SysFSTools; use PVE::Systemd; use Time::HiRes qw(gettimeofday); use File::Copy qw(copy); @@ -75,12 +78,6 @@ PVE::JSONSchema::register_standard_option('pve-qm-stateuri', { optional => 1, }); -PVE::JSONSchema::register_standard_option('pve-snapshot-name', { - description => "The name of the snapshot.", - type => 'string', format => 'pve-configid', - maxLength => 40, -}); - PVE::JSONSchema::register_standard_option('pve-qm-image-format', { type => 'string', enum => [qw(raw cow qcow qed qcow2 vmdk cloop)], @@ -118,8 +115,6 @@ mkdir $var_run_tmpdir; my $lock_dir = "/var/lock/qemu-server"; mkdir $lock_dir; -my $pcisysfs = "/sys/bus/pci"; - my $cpu_vendor_list = { # Intel CPUs 486 => 'GenuineIntel', @@ -187,6 +182,13 @@ my $cpu_fmt = { optional => 1, default => 0 }, + 'hv-vendor-id' => { + type => 'string', + pattern => qr/[a-zA-Z0-9]{1,12}/, + format_description => 'vendor-id', + description => 'The Hyper-V vendor ID. Some drivers or programs inside Windows guests need a specific ID.', + optional => 1, + }, flags => { description => "List of additional CPU flags separated by ';'." . " Use '+FLAG' to enable, '-FLAG' to disable a flag." @@ -238,7 +240,7 @@ my $vga_fmt = { default => 'std', optional => 1, default_key => 1, - enum => [qw(cirrus qxl qxl2 qxl3 qxl4 serial0 serial1 serial2 serial3 std virtio vmware)], + enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)], }, memory => { description => "Sets the VGA memory (in MiB). Has no effect with serial display.", @@ -249,6 +251,21 @@ my $vga_fmt = { }, }; +my $ivshmem_fmt = { + size => { + type => 'integer', + minimum => 1, + description => "The size of the file in MB.", + }, + name => { + type => 'string', + pattern => '[a-zA-Z0-9\-]+', + optional => 1, + format_description => 'string', + description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.", + }, +}; + my $confdesc = { onboot => { optional => 1, @@ -278,7 +295,7 @@ my $confdesc = { optional => 1, type => 'string', description => "Lock/unlock the VM.", - enum => [qw(migrate backup snapshot rollback)], + enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)], }, cpulimit => { optional => 1, @@ -608,8 +625,45 @@ EODESCR default => "1 (autogenerated)", optional => 1, }, + hookscript => { + type => 'string', + format => 'pve-volume-id', + optional => 1, + description => "Script that will be executed during various steps in the vms lifetime.", + }, + ivshmem => { + type => 'string', + format => $ivshmem_fmt, + description => "Inter-VM shared memory. Useful for direct communication between VMs, or to the host.", + optional => 1, + } }; +my $cicustom_fmt = { + meta => { + type => 'string', + optional => 1, + description => 'Specify a custom file containing all meta data passed to the VM via cloud-init. This is provider specific meaning configdrive2 and nocloud differ.', + format => 'pve-volume-id', + format_description => 'volume', + }, + network => { + type => 'string', + optional => 1, + description => 'Specify a custom file containing all network data passed to the VM via cloud-init.', + format => 'pve-volume-id', + format_description => 'volume', + }, + user => { + type => 'string', + optional => 1, + description => 'Specify a custom file containing all user data passed to the VM via cloud-init.', + format => 'pve-volume-id', + format_description => 'volume', + }, +}; +PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt); + my $confdesc_cloudinit = { citype => { optional => 1, @@ -627,6 +681,12 @@ my $confdesc_cloudinit = { type => 'string', description => 'cloud-init: Password to assign the user. Using this is generally not recommended. Use ssh keys instead. Also note that older cloud-init versions do not support hashed passwords.', }, + cicustom => { + optional => 1, + type => 'string', + description => 'cloud-init: Specify custom files to replace the automatically generated ones at start.', + format => 'pve-qm-cicustom', + }, searchdomain => { optional => 1, type => 'string', @@ -737,13 +797,9 @@ The DHCP server assign addresses to the guest starting from 10.0.2.15. __EOD__ my $net_fmt = { - macaddr => { - type => 'string', - pattern => qr/[0-9a-f]{2}(?::[0-9a-f]{2}){5}/i, + macaddr => get_standard_option('mac-addr', { description => "MAC address. That address must be unique withing your network. This is automatically generated if not specified.", - format_description => "XX:XX:XX:XX:XX:XX", - optional => 1, - }, + }), model => { type => 'string', description => "Network Card Model. The 'virtio' model provides the best performance with very low CPU overhead. If your guest does not support this driver, it is usually best to use 'e1000'.", @@ -1039,6 +1095,16 @@ my %ssd_fmt = ( }, ); +my %wwn_fmt = ( + wwn => { + type => 'string', + pattern => qr/^(0x)[0-9a-fA-F]{16}/, + format_description => 'wwn', + description => "The drive's worldwide name, encoded as 16 bytes hex string, prefixed by '0x'.", + optional => 1, + }, +); + my $add_throttle_desc = sub { my ($key, $type, $what, $unit, $longunit, $minimum) = @_; my $d = { @@ -1087,6 +1153,7 @@ my $ide_fmt = { %drivedesc_base, %model_fmt, %ssd_fmt, + %wwn_fmt, }; PVE::JSONSchema::register_format("pve-qm-ide", $ide_fmt); @@ -1103,6 +1170,7 @@ my $scsi_fmt = { %queues_fmt, %scsiblock_fmt, %ssd_fmt, + %wwn_fmt, }; my $scsidesc = { optional => 1, @@ -1114,6 +1182,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); my $sata_fmt = { %drivedesc_base, %ssd_fmt, + %wwn_fmt, }; my $satadesc = { optional => 1, @@ -1140,6 +1209,7 @@ my $alldrive_fmt = { %queues_fmt, %scsiblock_fmt, %ssd_fmt, + %wwn_fmt, }; my $efidisk_fmt = { @@ -1203,8 +1273,7 @@ my $usbdesc = { }; PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc); -# NOTE: the match-groups of this regex are used in parse_hostpci -my $PCIRE = qr/([a-f0-9]{2}:[a-f0-9]{2})(?:\.([a-f0-9]))?/; +my $PCIRE = qr/[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/; my $hostpci_fmt = { host => { default_key => 1, @@ -1245,6 +1314,17 @@ EODESCR optional => 1, default => 0, }, + 'mdev' => { + type => 'string', + format_description => 'string', + pattern => '[^/\.:]+', + optional => 1, + description => <{interface} eq 'virtio') { - my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges); + my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges, $arch, $machine_type); $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}$pciaddr"; $device .= ",iothread=iothread-$drive->{interface}$drive->{index}" if $drive->{iothread}; } elsif ($drive->{interface} eq 'scsi') { @@ -1746,6 +1841,7 @@ sub print_drivedevice_full { if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) { $device .= ",rotation_rate=1"; } + $device .= ",wwn=$drive->{wwn}" if $drive->{wwn}; } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') { my $maxdev = ($drive->{interface} eq 'sata') ? $MAX_SATA_DISKS : 2; @@ -1770,6 +1866,7 @@ sub print_drivedevice_full { $device .= ",rotation_rate=1"; } } + $device .= ",wwn=$drive->{wwn}" if $drive->{wwn}; } elsif ($drive->{interface} eq 'usb') { die "implement me"; # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 @@ -1896,7 +1993,7 @@ sub print_drive_full { } sub print_netdevice_full { - my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files) = @_; + my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_; my $bootorder = $conf->{boot} || $confdesc->{boot}->{default}; @@ -1905,7 +2002,7 @@ sub print_netdevice_full { $device = 'virtio-net-pci'; }; - my $pciaddr = print_pci_addr("$netid", $bridges); + my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type); my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid"; if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){ #Consider we have N queues, the number of vectors needed is 2*N + 2 (plus one config interrupt and control vq) @@ -1934,7 +2031,7 @@ sub print_netdevice_full { } sub print_netdev_full { - my ($vmid, $conf, $net, $netid, $hotplug) = @_; + my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_; my $i = ''; if ($netid =~ m/^net(\d+)$/) { @@ -1951,7 +2048,7 @@ sub print_netdev_full { my $vhostparam = ''; if (is_native($arch)) { - $vhostparam = ',vhost=on' if $kernel_has_vhost_net && $net->{model} eq 'virtio'; + $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio'; } my $vmname = $conf->{name} || "vm$vmid"; @@ -1998,9 +2095,12 @@ my $vga_map = { }; sub print_vga_device { - my ($conf, $vga, $id, $qxlnum, $bridges) = @_; + my ($conf, $vga, $arch, $machine, $id, $qxlnum, $bridges) = @_; my $type = $vga_map->{$vga->{type}}; + if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') { + $type = 'virtio-gpu'; + } my $vgamem_mb = $vga->{memory}; if ($qxlnum) { $type = $id ? 'qxl' : 'qxl-vga'; @@ -2031,9 +2131,9 @@ sub print_vga_device { if ($q35 && $vgaid eq 'vga') { # the first display uses pcie.0 bus on q35 machines - $pciaddr = print_pcie_addr($vgaid, $bridges); + $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine); } else { - $pciaddr = print_pci_addr($vgaid, $bridges); + $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine); } return "$type,id=${vgaid}${memory}${pciaddr}"; @@ -2086,16 +2186,12 @@ sub parse_hostpci { my @idlist = split(/;/, $res->{host}); delete $res->{host}; foreach my $id (@idlist) { - if ($id =~ /^$PCIRE$/) { - if (defined($2)) { - push @{$res->{pciid}}, { id => $1, function => $2 }; - } else { - my $pcidevices = lspci($1); - $res->{pciid} = $pcidevices->{$1}; - } - } else { - # should have been caught by parse_property_string already - die "failed to parse PCI id: $id\n"; + if ($id =~ m/\./) { # full id 00:00.1 + push @{$res->{pciid}}, { + id => $id, + }; + } else { # partial id 00:00 + $res->{pciid} = PVE::SysFSTools::lspci($id); } } return $res; @@ -2439,15 +2535,6 @@ sub check_type { } } -sub check_iommu_support{ - #fixme : need to check IOMMU support - #http://www.linux-kvm.org/page/How_to_assign_devices_with_VT-d_in_KVM - - my $iommu=1; - return $iommu; - -} - sub touch_config { my ($vmid) = @_; @@ -2764,6 +2851,8 @@ sub check_local_resources { $loc_res = 1 if $conf->{hostusb}; # old syntax $loc_res = 1 if $conf->{hostpci}; # old syntax + $loc_res = 1 if $conf->{ivshmem}; + foreach my $k (keys %$conf) { next if $k =~ m/^usb/ && ($conf->{$k} eq 'spice'); # sockets are safe: they will recreated be on the target side post-migrate @@ -2971,6 +3060,11 @@ our $vmstatus_return_properties = { type => 'number', optional => 1, }, + lock => { + description => "The current config lock, if any.", + type => 'string', + optional => 1, + } }; my $last_proc_pid_stat; @@ -3041,6 +3135,7 @@ sub vmstatus { $d->{template} = PVE::QemuConfig->is_template($conf); $d->{serial} = 1 if conf_has_serial($conf); + $d->{lock} = $conf->{lock} if $conf->{lock}; $res->{$vmid} = $d; } @@ -3317,18 +3412,20 @@ sub get_cpu_options { if ($arch eq 'aarch64') { $cpu = 'cortex-a57'; } + my $hv_vendor_id; if (my $cputype = $conf->{cpu}) { my $cpuconf = PVE::JSONSchema::parse_property_string($cpu_fmt, $cputype) or die "Cannot parse cpu description: $cputype\n"; $cpu = $cpuconf->{cputype}; $kvm_off = 1 if $cpuconf->{hidden}; + $hv_vendor_id = $cpuconf->{'hv-vendor-id'}; if (defined(my $flags = $cpuconf->{flags})) { push @$cpuFlags, split(";", $flags); } } - push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64'; + push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64' && $arch eq 'x86_64'; push @$cpuFlags , '-x2apic' if $conf->{ostype} && $conf->{ostype} eq 'solaris'; @@ -3337,15 +3434,15 @@ sub get_cpu_options { push @$cpuFlags, '-rdtscp' if $cpu =~ m/^Opteron/; - if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3) && $arch eq 'x86_64') { push @$cpuFlags , '+kvm_pv_unhalt' if $kvm; push @$cpuFlags , '+kvm_pv_eoi' if $kvm; } - add_hyperv_enlightenments($cpuFlags, $winversion, $machine_type, $kvmver, $conf->{bios}, $gpu_passthrough) if $kvm; + add_hyperv_enlightenments($cpuFlags, $winversion, $machine_type, $kvmver, $conf->{bios}, $gpu_passthrough, $hv_vendor_id) if $kvm; - push @$cpuFlags, 'enforce' if $cpu ne 'host' && $kvm; + push @$cpuFlags, 'enforce' if $cpu ne 'host' && $kvm && $arch eq 'x86_64'; push @$cpuFlags, 'kvm=off' if $kvm_off; @@ -3418,8 +3515,7 @@ sub config_to_command { push @$cmd, '-mon', "chardev=qmp,mode=control"; if (qemu_machine_feature_enabled($machine_type, $kvmver, 2, 12)) { - my $eventsocket = qmp_socket($vmid, 0, 'event'); - push @$cmd, '-chardev', "socket,id=qmp-event,path=$eventsocket,server,nowait"; + push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5"; push @$cmd, '-mon', "chardev=qmp-event,mode=control"; } @@ -3469,7 +3565,7 @@ sub config_to_command { # add usb controllers - my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers($conf, $bridges, $q35, $usbdesc->{format}, $MAX_USB_DEVICES); + my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers($conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES); push @$devices, @usbcontrollers if @usbcontrollers; my $vga = parse_vga($conf->{vga}); @@ -3477,7 +3573,9 @@ sub config_to_command { $vga->{type} = 'qxl' if $qxlnum; if (!$vga->{type}) { - if (qemu_machine_feature_enabled($machine_type, $kvmver, 2, 9)) { + if ($arch eq 'aarch64') { + $vga->{type} = 'virtio'; + } elsif (qemu_machine_feature_enabled($machine_type, $kvmver, 2, 9)) { $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus'; } else { $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus'; @@ -3494,7 +3592,11 @@ sub config_to_command { $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card) } - push @$devices, '-device', print_tabletdevice_full($conf) if $tablet; + if ($tablet) { + push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet; + my $kbd = print_keyboarddevice_full($conf, $arch); + push @$devices, '-device', $kbd if defined($kbd); + } my $kvm_off = 0; my $gpu_passthrough; @@ -3505,11 +3607,17 @@ sub config_to_command { next if !$d; my $pcie = $d->{pcie}; - if($pcie){ + if ($pcie) { die "q35 machine model is not enabled" if !$q35; - $pciaddr = print_pcie_addr("hostpci$i"); - }else{ - $pciaddr = print_pci_addr("hostpci$i", $bridges); + # win7 wants to have the pcie devices directly on the pcie bus + # instead of in the root port + if ($winversion == 7) { + $pciaddr = print_pcie_addr("hostpci${i}bus0"); + } else { + $pciaddr = print_pcie_addr("hostpci$i"); + } + } else { + $pciaddr = print_pci_addr("hostpci$i", $bridges, $arch, $machine_type); } my $rombar = defined($d->{rombar}) && !$d->{rombar} ? ',rombar=0' : ''; @@ -3519,7 +3627,7 @@ sub config_to_command { if ($d->{'x-vga'}) { $xvga = ',x-vga=on'; $kvm_off = 1; - $vga->{type} = 'none'; + $vga->{type} = 'none' if !defined($conf->{vga}); $gpu_passthrough = 1; if ($conf->{bios} && $conf->{bios} eq 'ovmf') { @@ -3528,6 +3636,14 @@ sub config_to_command { } my $pcidevices = $d->{pciid}; my $multifunction = 1 if @$pcidevices > 1; + my $sysfspath; + if ($d->{mdev} && scalar(@$pcidevices) == 1) { + my $id = $pcidevices->[0]->{id}; + my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i); + $sysfspath = "/sys/bus/pci/devices/0000:$id/$uuid"; + } elsif ($d->{mdev}) { + warn "ignoring mediated device with multifunction device\n"; + } my $j=0; foreach my $pcidevice (@$pcidevices) { @@ -3536,7 +3652,13 @@ sub config_to_command { $id .= ".$j" if $multifunction; my $addr = $pciaddr; $addr .= ".$j" if $multifunction; - my $devicestr = "vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr"; + my $devicestr = "vfio-pci"; + if ($sysfspath) { + $devicestr .= ",sysfsdev=$sysfspath"; + } else { + $devicestr .= ",host=$pcidevice->{id}"; + } + $devicestr .= ",id=$id$addr"; if($j == 0){ $devicestr .= "$rombar$xvga"; @@ -3558,7 +3680,14 @@ sub config_to_command { if ($path eq 'socket') { my $socket = "/var/run/qemu-server/${vmid}.serial$i"; push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server,nowait"; - push @$devices, '-device', "isa-serial,chardev=serial$i"; + # On aarch64, serial0 is the UART device. Qemu only allows + # connecting UART devices via the '-serial' command line, as + # the device has a fixed slot on the hardware... + if ($arch eq 'aarch64' && $i == 0) { + push @$devices, '-serial', "chardev:serial$i"; + } else { + push @$devices, '-device', "isa-serial,chardev=serial$i"; + } } else { die "no such serial device\n" if ! -c $path; push @$devices, '-chardev', "tty,id=serial$i,path=$path"; @@ -3623,7 +3752,7 @@ sub config_to_command { push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0; if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){ - push @$devices, '-device', print_vga_device($conf, $vga, undef, $qxlnum, $bridges); + push @$devices, '-device', print_vga_device($conf, $vga, $arch, $machine_type, undef, $qxlnum, $bridges); my $socket = vnc_socket($vmid); push @$cmd, '-vnc', "unix:$socket,x509,password"; } else { @@ -3681,7 +3810,7 @@ sub config_to_command { if (parse_guest_agent($conf)->{enabled}) { my $qgasocket = qmp_socket($vmid, 1); - my $pciaddr = print_pci_addr("qga0", $bridges); + my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type); push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0"; push @$devices, '-device', "virtio-serial,id=qga0$pciaddr"; push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0'; @@ -3693,7 +3822,7 @@ sub config_to_command { if ($qxlnum > 1) { if ($winversion){ for(my $i = 1; $i < $qxlnum; $i++){ - push @$devices, '-device', print_vga_device($conf, $vga, $i, $qxlnum, $bridges); + push @$devices, '-device', print_vga_device($conf, $vga, $arch, $machine_type, $i, $qxlnum, $bridges); } } else { # assume other OS works like Linux @@ -3707,7 +3836,7 @@ sub config_to_command { } } - my $pciaddr = print_pci_addr("spice", $bridges); + my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type); my $nodename = PVE::INotify::nodename(); my $pfamily = PVE::Tools::get_host_address_family($nodename); @@ -3725,13 +3854,13 @@ sub config_to_command { # enable balloon by default, unless explicitly disabled if (!defined($conf->{balloon}) || $conf->{balloon}) { - $pciaddr = print_pci_addr("balloon0", $bridges); + $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type); push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr"; } if ($conf->{watchdog}) { my $wdopts = parse_watchdog($conf->{watchdog}); - $pciaddr = print_pci_addr("watchdog", $bridges); + $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type); my $watchdog = $wdopts->{model} || 'i6300esb'; push @$devices, '-device', "$watchdog$pciaddr"; push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action}; @@ -3779,7 +3908,7 @@ sub config_to_command { my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); - $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges); + $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type); my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw; my $iothread = ''; @@ -3801,14 +3930,14 @@ sub config_to_command { if ($drive->{interface} eq 'sata') { my $controller = int($drive->{index} / $MAX_SATA_DISKS); - $pciaddr = print_pci_addr("ahci$controller", $bridges); + $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type); push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr" if !$ahcicontroller->{$controller}; $ahcicontroller->{$controller}=1; } my $drive_cmd = print_drive_full($storecfg, $vmid, $drive); push @$devices, '-drive',$drive_cmd; - push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges); + push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type); }); for (my $i = 0; $i < $MAX_NETS; $i++) { @@ -3823,13 +3952,30 @@ sub config_to_command { $bootindex_hash->{n} += 1; } - my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i"); + my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, "net$i"); push @$devices, '-netdev', $netdevfull; - my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files); + my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files, $arch, $machine_type); push @$devices, '-device', $netdevicefull; } + if ($conf->{ivshmem}) { + my $ivshmem = PVE::JSONSchema::parse_property_string($ivshmem_fmt, $conf->{ivshmem}); + + my $bus; + if ($q35) { + $bus = print_pcie_addr("ivshmem"); + } else { + $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type); + } + + my $ivshmem_name = $ivshmem->{name} // $vmid; + my $path = '/dev/shm/pve-shm-' . $ivshmem_name; + + push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,"; + push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path,size=$ivshmem->{size}M"; + } + if (!$q35) { # add pci bridges if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { @@ -3840,17 +3986,11 @@ sub config_to_command { $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/; while (my ($k, $v) = each %$bridges) { - $pciaddr = print_pci_addr("pci.$k"); + $pciaddr = print_pci_addr("pci.$k", undef, $arch, $machine_type); unshift @$devices, '-device', "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" if $k > 0; } } - # add custom args - if ($conf->{args}) { - my $aa = PVE::Tools::split_args($conf->{args}); - push @$cmd, @$aa; - } - push @$cmd, @$devices; push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); @@ -3859,6 +3999,18 @@ sub config_to_command { push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags); + if (my $vmstate = $conf->{vmstate}) { + my $statepath = PVE::Storage::path($storecfg, $vmstate); + PVE::Storage::activate_volumes($storecfg, [$vmstate]); + push @$cmd, '-loadstate', $statepath; + } + + # add custom args + if ($conf->{args}) { + my $aa = PVE::Tools::split_args($conf->{args}); + push @$cmd, @$aa; + } + return wantarray ? ($cmd, $vollist, $spice_port) : $cmd; } @@ -3938,18 +4090,22 @@ sub vm_devices_list { } sub vm_deviceplug { - my ($storecfg, $conf, $vmid, $deviceid, $device) = @_; + my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_; my $q35 = machine_type_is_q35($conf); my $devices_list = vm_devices_list($vmid); return 1 if defined($devices_list->{$deviceid}); - qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid); # add PCI bridge if we need it for the device + qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type); # add PCI bridge if we need it for the device if ($deviceid eq 'tablet') { - qemu_deviceadd($vmid, print_tabletdevice_full($conf)); + qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch)); + + } elsif ($deviceid eq 'keyboard') { + + qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch)); } elsif ($deviceid =~ m/^usb(\d+)$/) { @@ -3964,7 +4120,7 @@ sub vm_deviceplug { qemu_iothread_add($vmid, $deviceid, $device); qemu_driveadd($storecfg, $vmid, $device); - my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); + my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, $arch, $machine_type); qemu_deviceadd($vmid, $devicefull); eval { qemu_deviceaddverify($vmid, $deviceid); }; @@ -3978,7 +4134,7 @@ sub vm_deviceplug { my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi"; - my $pciaddr = print_pci_addr($deviceid); + my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type); my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw; my $devicefull = "$scsihw_type,id=$deviceid$pciaddr"; @@ -3997,10 +4153,10 @@ sub vm_deviceplug { } elsif ($deviceid =~ m/^(scsi)(\d+)$/) { - qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device); + qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type); qemu_driveadd($storecfg, $vmid, $device); - my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); + my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, $arch, $machine_type); eval { qemu_deviceadd($vmid, $devicefull); }; if (my $err = $@) { eval { qemu_drivedel($vmid, $deviceid); }; @@ -4010,25 +4166,28 @@ sub vm_deviceplug { } elsif ($deviceid =~ m/^(net)(\d+)$/) { - return undef if !qemu_netdevadd($vmid, $conf, $device, $deviceid); + return undef if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid); - my $machine_type = PVE::QemuServer::qemu_machine_pxe($vmid, $conf); - my $use_old_bios_files = undef; - ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type); + my $machine_type = PVE::QemuServer::qemu_machine_pxe($vmid, $conf); + my $use_old_bios_files = undef; + ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type); - my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid, undef, $use_old_bios_files); - qemu_deviceadd($vmid, $netdevicefull); - eval { qemu_deviceaddverify($vmid, $deviceid); }; + my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type); + qemu_deviceadd($vmid, $netdevicefull); + eval { + qemu_deviceaddverify($vmid, $deviceid); + qemu_set_link_status($vmid, $deviceid, !$device->{link_down}); + }; if (my $err = $@) { eval { qemu_netdevdel($vmid, $deviceid); }; warn $@ if $@; die $err; - } + } } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) { my $bridgeid = $2; - my $pciaddr = print_pci_addr($deviceid); + my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type); my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr"; qemu_deviceadd($vmid, $devicefull); @@ -4050,7 +4209,7 @@ sub vm_deviceunplug { die "can't unplug bootdisk" if $conf->{bootdisk} && $conf->{bootdisk} eq $deviceid; - if ($deviceid eq 'tablet') { + if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') { qemu_devicedel($vmid, $deviceid); @@ -4122,7 +4281,11 @@ sub qemu_iothread_add { sub qemu_iothread_del { my($conf, $vmid, $deviceid) = @_; - my $device = parse_drive($deviceid, $conf->{$deviceid}); + my $confid = $deviceid; + if ($deviceid =~ m/^(?:virtioscsi|scsihw)(\d+)$/) { + $confid = 'scsi' . $1; + } + my $device = parse_drive($confid, $conf->{$confid}); if ($device->{iothread}) { my $iothreads = vm_iothreads_list($vmid); qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"}; @@ -4201,7 +4364,7 @@ sub qemu_devicedelverify { } sub qemu_findorcreatescsihw { - my ($storecfg, $conf, $vmid, $device) = @_; + my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_; my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device); @@ -4209,7 +4372,7 @@ sub qemu_findorcreatescsihw { my $devices_list = vm_devices_list($vmid); if(!defined($devices_list->{$scsihwid})) { - vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device); + vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type); } return 1; @@ -4245,13 +4408,13 @@ sub qemu_deletescsihw { } sub qemu_add_pci_bridge { - my ($storecfg, $conf, $vmid, $device) = @_; + my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_; my $bridges = {}; my $bridgeid; - print_pci_addr($device, $bridges); + print_pci_addr($device, $bridges, $arch, $machine_type); while (my ($k, $v) = each %$bridges) { $bridgeid = $k; @@ -4262,7 +4425,7 @@ sub qemu_add_pci_bridge { my $devices_list = vm_devices_list($vmid); if (!defined($devices_list->{$bridge})) { - vm_deviceplug($storecfg, $conf, $vmid, $bridge); + vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type); } return 1; @@ -4276,9 +4439,9 @@ sub qemu_set_link_status { } sub qemu_netdevadd { - my ($vmid, $conf, $device, $deviceid) = @_; + my ($vmid, $conf, $arch, $device, $deviceid) = @_; - my $netdev = print_netdev_full($vmid, $conf, $device, $deviceid, 1); + my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1); my %options = split(/[=,]/, $netdev); vm_mon_cmd($vmid, "netdev_add", %options); @@ -4292,7 +4455,7 @@ sub qemu_netdevdel { } sub qemu_usb_hotplug { - my ($storecfg, $conf, $vmid, $deviceid, $device) = @_; + my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_; return if !$device; @@ -4305,7 +4468,7 @@ sub qemu_usb_hotplug { my $devicelist = vm_devices_list($vmid); if (!$devicelist->{xhci}) { - my $pciaddr = print_pci_addr("xhci"); + my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type); qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr"); } } @@ -4313,7 +4476,7 @@ sub qemu_usb_hotplug { $d->{usb3} = $device->{usb3}; # add the new one - vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d); + vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type); } sub qemu_cpu_hotplug { @@ -4537,6 +4700,7 @@ my $fast_plug_option = { 'description' => 1, 'protection' => 1, 'vmstatestorage' => 1, + 'hookscript' => 1, }; # hotplug changes in [PENDING] @@ -4547,6 +4711,7 @@ sub vmconfig_hotplug_pending { my ($vmid, $conf, $storecfg, $selection, $errors) = @_; my $defaults = load_defaults(); + my ($arch, $machine_type) = get_basic_machine_info($conf, undef); # commit values which do not have any impact on running VM first # Note: those option cannot raise errors, we we do not care about @@ -4582,9 +4747,12 @@ sub vmconfig_hotplug_pending { } elsif ($opt eq 'tablet') { die "skip\n" if !$hotplug_features->{usb}; if ($defaults->{tablet}) { - vm_deviceplug($storecfg, $conf, $vmid, $opt); + vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type); + vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type) + if $arch eq 'aarch64'; } else { - vm_deviceunplug($vmid, $conf, $opt); + vm_deviceunplug($vmid, $conf, 'tablet'); + vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64'; } } elsif ($opt =~ m/^usb\d+/) { die "skip\n"; @@ -4657,9 +4825,12 @@ sub vmconfig_hotplug_pending { } elsif ($opt eq 'tablet') { die "skip\n" if !$hotplug_features->{usb}; if ($value == 1) { - vm_deviceplug($storecfg, $conf, $vmid, $opt); + vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type); + vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type) + if $arch eq 'aarch64'; } elsif ($value == 0) { - vm_deviceunplug($vmid, $conf, $opt); + vm_deviceunplug($vmid, $conf, 'tablet'); + vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64'; } } elsif ($opt =~ m/^usb\d+$/) { die "skip\n"; @@ -4668,7 +4839,7 @@ sub vmconfig_hotplug_pending { die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i; my $d = eval { PVE::JSONSchema::parse_property_string($usbdesc->{format}, $value) }; die "skip\n" if !$d; - qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d); + qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type); } elsif ($opt eq 'vcpus') { die "skip\n" if !$hotplug_features->{cpu}; qemu_cpu_hotplug($vmid, $conf, $value); @@ -4686,7 +4857,7 @@ sub vmconfig_hotplug_pending { } elsif ($opt =~ m/^net(\d+)$/) { # some changes can be done without hotplug vmconfig_update_net($storecfg, $conf, $hotplug_features->{network}, - $vmid, $opt, $value); + $vmid, $opt, $value, $arch, $machine_type); } elsif (is_valid_drivename($opt)) { # some changes can be done without hotplug my $drive = parse_drive($opt, $value); @@ -4694,7 +4865,7 @@ sub vmconfig_hotplug_pending { &$apply_pending_cloudinit($opt, $value); } vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk}, - $vmid, $opt, $value, 1); + $vmid, $opt, $value, 1, $arch, $machine_type); } elsif ($opt =~ m/^memory$/) { #dimms die "skip\n" if !$hotplug_features->{memory}; $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value); @@ -4823,7 +4994,7 @@ my $safe_string_ne = sub { }; sub vmconfig_update_net { - my ($storecfg, $conf, $hotplug, $vmid, $opt, $value) = @_; + my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_; my $newnet = parse_net($value); @@ -4864,14 +5035,14 @@ sub vmconfig_update_net { } if ($hotplug) { - vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet); + vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type); } else { die "skip\n"; } } sub vmconfig_update_disk { - my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $force) = @_; + my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $force, $arch, $machine_type) = @_; # fixme: do we need force? @@ -4972,7 +5143,7 @@ sub vmconfig_update_disk { die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/; # hotplug new disks PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|; - vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive); + vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type); } sub vm_start { @@ -4984,7 +5155,10 @@ sub vm_start { die "you can't start a vm if it's a template\n" if PVE::QemuConfig->is_template($conf); - PVE::QemuConfig->check_lock($conf) if !$skiplock; + my $is_suspended = PVE::QemuConfig->has_lock($conf, 'suspended'); + + PVE::QemuConfig->check_lock($conf) + if !($skiplock || $is_suspended); die "VM $vmid already running\n" if check_running($vmid, undef, $migratedfrom); @@ -5048,6 +5222,14 @@ sub vm_start { } } + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1); + + if ($is_suspended) { + # enforce machine type on suspended vm to ensure HW compatibility + $forcemachine = $conf->{runningmachine}; + print "Resuming suspended VM\n"; + } + my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine); my $migrate_port = 0; @@ -5108,19 +5290,27 @@ sub vm_start { next if !$d; my $pcidevices = $d->{pciid}; foreach my $pcidevice (@$pcidevices) { - my $pciid = $pcidevice->{id}.".".$pcidevice->{function}; + my $pciid = $pcidevice->{id}; - my $info = pci_device_info("0000:$pciid"); - die "IOMMU not present\n" if !check_iommu_support(); + my $info = PVE::SysFSTools::pci_device_info("0000:$pciid"); + die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support(); die "no pci device info for device '$pciid'\n" if !$info; - die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid); - die "can't reset pci device '$pciid'\n" if $info->{has_fl_reset} and !pci_dev_reset($info); + + if ($d->{mdev}) { + my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i); + PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $d->{mdev}); + } else { + die "can't unbind/bind pci group to vfio '$pciid'\n" + if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid); + die "can't reset pci device '$pciid'\n" + if $info->{has_fl_reset} and !PVE::SysFSTools::pci_dev_reset($info); + } } } PVE::Storage::activate_volumes($storecfg, $vollist); - if (!check_running($vmid, 1)) { + if (-d "/sys/fs/cgroup/systemd/qemu.slice/$vmid.scope") { eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub {}, errfunc => sub {}); @@ -5130,7 +5320,7 @@ sub vm_start { my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits} : $defaults->{cpuunits}; - my $start_timeout = $conf->{hugepages} ? 300 : 30; + my $start_timeout = ($conf->{hugepages} || $is_suspended) ? 300 : 30; my %run_params = (timeout => $statefile ? undef : $start_timeout, umask => 0077); my %properties = ( @@ -5237,6 +5427,15 @@ sub vm_start { property => "guest-stats-polling-interval", value => 2) if (!defined($conf->{balloon}) || $conf->{balloon}); + if ($is_suspended && (my $vmstate = $conf->{vmstate})) { + print "Resumed VM, removing state\n"; + delete $conf->@{qw(lock vmstate runningmachine)}; + PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); + PVE::Storage::vdisk_free($storecfg, $vmstate); + PVE::QemuConfig->write_config($vmid, $conf); + } + + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start'); }); } @@ -5298,10 +5497,19 @@ sub vm_human_monitor_command { } sub vm_commandline { - my ($storecfg, $vmid) = @_; + my ($storecfg, $vmid, $snapname) = @_; my $conf = PVE::QemuConfig->load_config($vmid); + if ($snapname) { + my $snapshot = $conf->{snapshots}->{$snapname}; + die "snapshot '$snapname' does not exist\n" if !defined($snapshot); + + $snapshot->{digest} = $conf->{digest}; # keep file digest for API + + $conf = $snapshot; + } + my $defaults = load_defaults(); my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults); @@ -5354,6 +5562,27 @@ sub vm_stop_cleanup { unlink "/var/run/qemu-server/${vmid}.$ext"; } + if ($conf->{ivshmem}) { + my $ivshmem = PVE::JSONSchema::parse_property_string($ivshmem_fmt, $conf->{ivshmem}); + # just delete it for now, VMs which have this already open do not + # are affected, but new VMs will get a separated one. If this + # becomes an issue we either add some sort of ref-counting or just + # add a "don't delete on stop" flag to the ivshmem format. + unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid); + } + + foreach my $key (keys %$conf) { + next if $key !~ m/^hostpci(\d+)$/; + my $hostpciindex = $1; + my $d = parse_hostpci($conf->{$key}); + my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex); + + foreach my $pci (@{$d->{pciid}}) { + my $pciid = $pci->{id}; + PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid); + } + } + vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes; }; warn $@ if $@; # avoid errors - just warn @@ -5388,6 +5617,7 @@ sub vm_stop { my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup}); $timeout = $opts->{down} if $opts->{down}; } + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop'); } $timeout = 60 if !defined($timeout); @@ -5452,17 +5682,84 @@ sub vm_stop { } sub vm_suspend { - my ($vmid, $skiplock) = @_; + my ($vmid, $skiplock, $includestate, $statestorage) = @_; + + my $conf; + my $path; + my $storecfg; + my $vmstate; PVE::QemuConfig->lock_config($vmid, sub { - my $conf = PVE::QemuConfig->load_config($vmid); + $conf = PVE::QemuConfig->load_config($vmid); + my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup'); PVE::QemuConfig->check_lock($conf) - if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup')); + if !($skiplock || $is_backing_up); - vm_mon_cmd($vmid, "stop"); + die "cannot suspend to disk during backup\n" + if $is_backing_up && $includestate; + + if ($includestate) { + $conf->{lock} = 'suspending'; + my $date = strftime("%Y-%m-%d", localtime(time())); + $storecfg = PVE::Storage::config(); + $vmstate = PVE::QemuConfig->__snapshot_save_vmstate($vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1); + $path = PVE::Storage::path($storecfg, $vmstate); + PVE::QemuConfig->write_config($vmid, $conf); + } else { + vm_mon_cmd($vmid, "stop"); + } }); + + if ($includestate) { + # save vm state + PVE::Storage::activate_volumes($storecfg, [$vmstate]); + + eval { + vm_mon_cmd($vmid, "savevm-start", statefile => $path); + for(;;) { + my $state = vm_mon_cmd_nocheck($vmid, "query-savevm"); + if (!$state->{status}) { + die "savevm not active\n"; + } elsif ($state->{status} eq 'active') { + sleep(1); + next; + } elsif ($state->{status} eq 'completed') { + print "State saved, quitting\n"; + last; + } elsif ($state->{status} eq 'failed' && $state->{error}) { + die "query-savevm failed with error '$state->{error}'\n" + } else { + die "query-savevm returned status '$state->{status}'\n"; + } + } + }; + my $err = $@; + + PVE::QemuConfig->lock_config($vmid, sub { + $conf = PVE::QemuConfig->load_config($vmid); + if ($err) { + # cleanup, but leave suspending lock, to indicate something went wrong + eval { + vm_mon_cmd($vmid, "savevm-end"); + PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); + PVE::Storage::vdisk_free($storecfg, $vmstate); + delete $conf->@{qw(vmstate runningmachine)}; + PVE::QemuConfig->write_config($vmid, $conf); + }; + warn $@ if $@; + die $err; + } + + die "lock changed unexpectedly\n" + if !PVE::QemuConfig->has_lock($conf, 'suspending'); + + vm_qmp_command($vmid, { execute => "quit" }); + $conf->{lock} = 'suspended'; + PVE::QemuConfig->write_config($vmid, $conf); + }); + } } sub vm_resume { @@ -5519,123 +5816,6 @@ sub vm_destroy { }); } -# pci helpers - -sub file_write { - my ($filename, $buf) = @_; - - my $fh = IO::File->new($filename, "w"); - return undef if !$fh; - - my $res = print $fh $buf; - - $fh->close(); - - return $res; -} - -sub pci_device_info { - my ($name) = @_; - - my $res; - - return undef if $name !~ m/^([a-f0-9]{4}):([a-f0-9]{2}):([a-f0-9]{2})\.([a-f0-9])$/; - my ($domain, $bus, $slot, $func) = ($1, $2, $3, $4); - - my $irq = file_read_firstline("$pcisysfs/devices/$name/irq"); - return undef if !defined($irq) || $irq !~ m/^\d+$/; - - my $vendor = file_read_firstline("$pcisysfs/devices/$name/vendor"); - return undef if !defined($vendor) || $vendor !~ s/^0x//; - - my $product = file_read_firstline("$pcisysfs/devices/$name/device"); - return undef if !defined($product) || $product !~ s/^0x//; - - $res = { - name => $name, - vendor => $vendor, - product => $product, - domain => $domain, - bus => $bus, - slot => $slot, - func => $func, - irq => $irq, - has_fl_reset => -f "$pcisysfs/devices/$name/reset" || 0, - }; - - return $res; -} - -sub pci_dev_reset { - my ($dev) = @_; - - my $name = $dev->{name}; - - my $fn = "$pcisysfs/devices/$name/reset"; - - return file_write($fn, "1"); -} - -sub pci_dev_bind_to_vfio { - my ($dev) = @_; - - my $name = $dev->{name}; - - my $vfio_basedir = "$pcisysfs/drivers/vfio-pci"; - - if (!-d $vfio_basedir) { - system("/sbin/modprobe vfio-pci >/dev/null 2>/dev/null"); - } - die "Cannot find vfio-pci module!\n" if !-d $vfio_basedir; - - my $testdir = "$vfio_basedir/$name"; - return 1 if -d $testdir; - - my $data = "$dev->{vendor} $dev->{product}"; - return undef if !file_write("$vfio_basedir/new_id", $data); - - my $fn = "$pcisysfs/devices/$name/driver/unbind"; - if (!file_write($fn, $name)) { - return undef if -f $fn; - } - - $fn = "$vfio_basedir/bind"; - if (! -d $testdir) { - return undef if !file_write($fn, $name); - } - - return -d $testdir; -} - -sub pci_dev_group_bind_to_vfio { - my ($pciid) = @_; - - my $vfio_basedir = "$pcisysfs/drivers/vfio-pci"; - - if (!-d $vfio_basedir) { - system("/sbin/modprobe vfio-pci >/dev/null 2>/dev/null"); - } - die "Cannot find vfio-pci module!\n" if !-d $vfio_basedir; - - # get IOMMU group devices - opendir(my $D, "$pcisysfs/devices/0000:$pciid/iommu_group/devices/") || die "Cannot open iommu_group: $!\n"; - my @devs = grep /^0000:/, readdir($D); - closedir($D); - - foreach my $pciid (@devs) { - $pciid =~ m/^([:\.\da-f]+)$/ or die "PCI ID $pciid not valid!\n"; - - # pci bridges, switches or root ports are not supported - # they have a pci_bus subdirectory so skip them - next if (-e "$pcisysfs/devices/$pciid/pci_bus"); - - my $info = pci_device_info($1); - pci_dev_bind_to_vfio($info) || die "Cannot bind $pciid to vfio\n"; - } - - return 1; -} - # vzdump restore implementaion sub tar_archive_read_firstfile { @@ -6451,6 +6631,23 @@ sub template_create { }); } +sub convert_iscsi_path { + my ($path) = @_; + + if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) { + my $portal = $1; + my $target = $2; + my $lun = $3; + + my $initiator_name = get_initiator_name(); + + return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,". + "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw"; + } + + die "cannot convert iscsi path '$path', unkown format\n"; +} + sub qemu_img_convert { my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_; @@ -6471,13 +6668,32 @@ sub qemu_img_convert { my $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname); my $dst_path = PVE::Storage::path($storecfg, $dst_volid); + my $src_is_iscsi = ($src_path =~ m|^iscsi://|); + my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|); + my $cmd = []; push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n'; push @$cmd, '-l', "snapshot.name=$snapname" if($snapname && $src_format eq "qcow2"); push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool'; push @$cmd, '-T', 'none' if $src_scfg->{type} eq 'zfspool'; - push @$cmd, '-f', $src_format, '-O', $dst_format, $src_path; - if ($is_zero_initialized) { + + if ($src_is_iscsi) { + push @$cmd, '--image-opts'; + $src_path = convert_iscsi_path($src_path); + } else { + push @$cmd, '-f', $src_format; + } + + if ($dst_is_iscsi) { + push @$cmd, '--target-image-opts'; + $dst_path = convert_iscsi_path($dst_path); + } else { + push @$cmd, '-O', $dst_format; + } + + push @$cmd, $src_path; + + if (!$dst_is_iscsi && $is_zero_initialized) { push @$cmd, "zeroinit:$dst_path"; } else { push @$cmd, $dst_path; @@ -6541,11 +6757,12 @@ sub qemu_drive_mirror { print "drive mirror is starting for drive-$drive\n"; - eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; #if a job already run for this device,it's throw an error - + # if a job already runs for this device we get an error, catch it for cleanup + eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; if (my $err = $@) { eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) }; - die "mirroring error: $err"; + warn "$@\n" if $@; + die "mirroring error: $err\n"; } qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete, $qga); @@ -6703,6 +6920,7 @@ sub clone_disk { my $name = undef; if (drive_is_cloudinit($drive)) { $name = "vm-$newvmid-cloudinit"; + $snapname = undef; # cloudinit only supports raw and qcow2 atm: if ($dst_format eq 'qcow2') { $name .= '.qcow2'; @@ -6839,25 +7057,6 @@ sub create_efidisk($$$$$) { return ($volid, $vars_size); } -sub lspci { - - my $devices = {}; - - dir_glob_foreach("$pcisysfs/devices", '[a-f0-9]{4}:([a-f0-9]{2}:[a-f0-9]{2})\.([0-9])', sub { - my (undef, $id, $function) = @_; - my $res = { id => $id, function => $function}; - push @{$devices->{$id}}, $res; - }); - - # Entries should be sorted by functions. - foreach my $id (keys %$devices) { - my $dev = $devices->{$id}; - $devices->{$id} = [ sort { $a->{function} <=> $b->{function} } @$dev ]; - } - - return $devices; -} - sub vm_iothreads_list { my ($vmid) = @_; @@ -6891,12 +7090,15 @@ sub scsihw_infos { } sub add_hyperv_enlightenments { - my ($cpuFlags, $winversion, $machine_type, $kvmver, $bios, $gpu_passthrough) = @_; + my ($cpuFlags, $winversion, $machine_type, $kvmver, $bios, $gpu_passthrough, $hv_vendor_id) = @_; return if $winversion < 6; return if $bios && $bios eq 'ovmf' && $winversion < 8; - push @$cpuFlags , 'hv_vendor_id=proxmox' if $gpu_passthrough; + if ($gpu_passthrough || defined($hv_vendor_id)) { + $hv_vendor_id //= 'proxmox'; + push @$cpuFlags , "hv_vendor_id=$hv_vendor_id"; + } if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { push @$cpuFlags , 'hv_spinlocks=0x1fff';