X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=fac4dfe7bfecf6bb3177660265fc116f5d812b9a;hb=90041ba890f5626f114ae227b9bdeb24f81f1224;hp=38f3a058499efb89969a155a7451c9e59ae37cbb;hpb=6ab45bd7ff8418abe00861df74ff85dfe244a108;p=qemu-server.git diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 38f3a05..fac4dfe 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -2,6 +2,7 @@ package PVE::QemuServer; use strict; use warnings; + use POSIX; use IO::Handle; use IO::Select; @@ -30,6 +31,7 @@ use PVE::ProcFSTools; use PVE::QemuConfig; use PVE::QMPClient; use PVE::RPCEnvironment; +use PVE::GuestHelpers; use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr); use PVE::QemuServer::Memory; use PVE::QemuServer::USB qw(parse_usb_device); @@ -52,7 +54,7 @@ my $OVMF = { ], }; -my $qemu_snap_storage = {rbd => 1, sheepdog => 1}; +my $qemu_snap_storage = { rbd => 1 }; my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); @@ -76,12 +78,6 @@ PVE::JSONSchema::register_standard_option('pve-qm-stateuri', { optional => 1, }); -PVE::JSONSchema::register_standard_option('pve-snapshot-name', { - description => "The name of the snapshot.", - type => 'string', format => 'pve-configid', - maxLength => 40, -}); - PVE::JSONSchema::register_standard_option('pve-qm-image-format', { type => 'string', enum => [qw(raw cow qcow qed qcow2 vmdk cloop)], @@ -186,6 +182,13 @@ my $cpu_fmt = { optional => 1, default => 0 }, + 'hv-vendor-id' => { + type => 'string', + pattern => qr/[a-zA-Z0-9]{1,12}/, + format_description => 'vendor-id', + description => 'The Hyper-V vendor ID. Some drivers or programs inside Windows guests need a specific ID.', + optional => 1, + }, flags => { description => "List of additional CPU flags separated by ';'." . " Use '+FLAG' to enable, '-FLAG' to disable a flag." @@ -237,7 +240,7 @@ my $vga_fmt = { default => 'std', optional => 1, default_key => 1, - enum => [qw(cirrus qxl qxl2 qxl3 qxl4 serial0 serial1 serial2 serial3 std virtio vmware)], + enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)], }, memory => { description => "Sets the VGA memory (in MiB). Has no effect with serial display.", @@ -248,6 +251,21 @@ my $vga_fmt = { }, }; +my $ivshmem_fmt = { + size => { + type => 'integer', + minimum => 1, + description => "The size of the file in MB.", + }, + name => { + type => 'string', + pattern => '[a-zA-Z0-9\-]+', + optional => 1, + format_description => 'string', + description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.", + }, +}; + my $confdesc = { onboot => { optional => 1, @@ -277,7 +295,7 @@ my $confdesc = { optional => 1, type => 'string', description => "Lock/unlock the VM.", - enum => [qw(migrate backup snapshot rollback)], + enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)], }, cpulimit => { optional => 1, @@ -607,8 +625,45 @@ EODESCR default => "1 (autogenerated)", optional => 1, }, + hookscript => { + type => 'string', + format => 'pve-volume-id', + optional => 1, + description => "Script that will be executed during various steps in the vms lifetime.", + }, + ivshmem => { + type => 'string', + format => $ivshmem_fmt, + description => "Inter-VM shared memory. Useful for direct communication between VMs, or to the host.", + optional => 1, + } }; +my $cicustom_fmt = { + meta => { + type => 'string', + optional => 1, + description => 'Specify a custom file containing all meta data passed to the VM via cloud-init. This is provider specific meaning configdrive2 and nocloud differ.', + format => 'pve-volume-id', + format_description => 'volume', + }, + network => { + type => 'string', + optional => 1, + description => 'Specify a custom file containing all network data passed to the VM via cloud-init.', + format => 'pve-volume-id', + format_description => 'volume', + }, + user => { + type => 'string', + optional => 1, + description => 'Specify a custom file containing all user data passed to the VM via cloud-init.', + format => 'pve-volume-id', + format_description => 'volume', + }, +}; +PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt); + my $confdesc_cloudinit = { citype => { optional => 1, @@ -626,6 +681,12 @@ my $confdesc_cloudinit = { type => 'string', description => 'cloud-init: Password to assign the user. Using this is generally not recommended. Use ssh keys instead. Also note that older cloud-init versions do not support hashed passwords.', }, + cicustom => { + optional => 1, + type => 'string', + description => 'cloud-init: Specify custom files to replace the automatically generated ones at start.', + format => 'pve-qm-cicustom', + }, searchdomain => { optional => 1, type => 'string', @@ -736,13 +797,9 @@ The DHCP server assign addresses to the guest starting from 10.0.2.15. __EOD__ my $net_fmt = { - macaddr => { - type => 'string', - pattern => qr/[0-9a-f]{2}(?::[0-9a-f]{2}){5}/i, + macaddr => get_standard_option('mac-addr', { description => "MAC address. That address must be unique withing your network. This is automatically generated if not specified.", - format_description => "XX:XX:XX:XX:XX:XX", - optional => 1, - }, + }), model => { type => 'string', description => "Network Card Model. The 'virtio' model provides the best performance with very low CPU overhead. If your guest does not support this driver, it is usually best to use 'e1000'.", @@ -1038,6 +1095,16 @@ my %ssd_fmt = ( }, ); +my %wwn_fmt = ( + wwn => { + type => 'string', + pattern => qr/^(0x)[0-9a-fA-F]{16}/, + format_description => 'wwn', + description => "The drive's worldwide name, encoded as 16 bytes hex string, prefixed by '0x'.", + optional => 1, + }, +); + my $add_throttle_desc = sub { my ($key, $type, $what, $unit, $longunit, $minimum) = @_; my $d = { @@ -1086,6 +1153,7 @@ my $ide_fmt = { %drivedesc_base, %model_fmt, %ssd_fmt, + %wwn_fmt, }; PVE::JSONSchema::register_format("pve-qm-ide", $ide_fmt); @@ -1102,6 +1170,7 @@ my $scsi_fmt = { %queues_fmt, %scsiblock_fmt, %ssd_fmt, + %wwn_fmt, }; my $scsidesc = { optional => 1, @@ -1113,6 +1182,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); my $sata_fmt = { %drivedesc_base, %ssd_fmt, + %wwn_fmt, }; my $satadesc = { optional => 1, @@ -1139,6 +1209,7 @@ my $alldrive_fmt = { %queues_fmt, %scsiblock_fmt, %ssd_fmt, + %wwn_fmt, }; my $efidisk_fmt = { @@ -1202,8 +1273,7 @@ my $usbdesc = { }; PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc); -# NOTE: the match-groups of this regex are used in parse_hostpci -my $PCIRE = qr/([a-f0-9]{2}:[a-f0-9]{2})(?:\.([a-f0-9]))?/; +my $PCIRE = qr/[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/; my $hostpci_fmt = { host => { default_key => 1, @@ -1211,7 +1281,7 @@ my $hostpci_fmt = { pattern => qr/$PCIRE(;$PCIRE)*/, format_description => 'HOSTPCIID[;HOSTPCIID2...]', description => < <{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) { $device .= ",rotation_rate=1"; } + $device .= ",wwn=$drive->{wwn}" if $drive->{wwn}; } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') { my $maxdev = ($drive->{interface} eq 'sata') ? $MAX_SATA_DISKS : 2; @@ -1793,6 +1866,7 @@ sub print_drivedevice_full { $device .= ",rotation_rate=1"; } } + $device .= ",wwn=$drive->{wwn}" if $drive->{wwn}; } elsif ($drive->{interface} eq 'usb') { die "implement me"; # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 @@ -1831,7 +1905,7 @@ sub print_drive_full { my $path; my $volid = $drive->{file}; my $format; - + if (drive_is_cdrom($drive)) { $path = get_iso_path($storecfg, $vmid, $volid); } else { @@ -1974,7 +2048,7 @@ sub print_netdev_full { my $vhostparam = ''; if (is_native($arch)) { - $vhostparam = ',vhost=on' if $kernel_has_vhost_net && $net->{model} eq 'virtio'; + $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio'; } my $vmname = $conf->{name} || "vm$vmid"; @@ -2024,7 +2098,7 @@ sub print_vga_device { my ($conf, $vga, $arch, $machine, $id, $qxlnum, $bridges) = @_; my $type = $vga_map->{$vga->{type}}; - if ($type eq 'virtio-vga' && $arch eq 'aarch64') { + if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') { $type = 'virtio-gpu'; } my $vgamem_mb = $vga->{memory}; @@ -2112,16 +2186,12 @@ sub parse_hostpci { my @idlist = split(/;/, $res->{host}); delete $res->{host}; foreach my $id (@idlist) { - if ($id =~ /^$PCIRE$/) { - if (defined($2)) { - push @{$res->{pciid}}, { id => $1, function => $2 }; - } else { - my $pcidevices = PVE::SysFSTools::lspci($1); - $res->{pciid} = $pcidevices->{$1}; - } - } else { - # should have been caught by parse_property_string already - die "failed to parse PCI id: $id\n"; + if ($id =~ m/\./) { # full id 00:00.1 + push @{$res->{pciid}}, { + id => $id, + }; + } else { # partial id 00:00 + $res->{pciid} = PVE::SysFSTools::lspci($id); } } return $res; @@ -2776,21 +2846,23 @@ sub config_list { sub check_local_resources { my ($conf, $noerr) = @_; - my $loc_res = 0; + my @loc_res = (); + + push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax + push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax - $loc_res = 1 if $conf->{hostusb}; # old syntax - $loc_res = 1 if $conf->{hostpci}; # old syntax + push @loc_res, "ivshmem" if $conf->{ivshmem}; foreach my $k (keys %$conf) { next if $k =~ m/^usb/ && ($conf->{$k} eq 'spice'); # sockets are safe: they will recreated be on the target side post-migrate next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket'); - $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/; + push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/; } - die "VM uses local resources\n" if $loc_res && !$noerr; + die "VM uses local resources\n" if scalar @loc_res && !$noerr; - return $loc_res; + return \@loc_res; } # check if used storages are available on all nodes (use by migrate) @@ -2988,6 +3060,11 @@ our $vmstatus_return_properties = { type => 'number', optional => 1, }, + lock => { + description => "The current config lock, if any.", + type => 'string', + optional => 1, + } }; my $last_proc_pid_stat; @@ -3058,6 +3135,7 @@ sub vmstatus { $d->{template} = PVE::QemuConfig->is_template($conf); $d->{serial} = 1 if conf_has_serial($conf); + $d->{lock} = $conf->{lock} if $conf->{lock}; $res->{$vmid} = $d; } @@ -3334,11 +3412,13 @@ sub get_cpu_options { if ($arch eq 'aarch64') { $cpu = 'cortex-a57'; } + my $hv_vendor_id; if (my $cputype = $conf->{cpu}) { my $cpuconf = PVE::JSONSchema::parse_property_string($cpu_fmt, $cputype) or die "Cannot parse cpu description: $cputype\n"; $cpu = $cpuconf->{cputype}; $kvm_off = 1 if $cpuconf->{hidden}; + $hv_vendor_id = $cpuconf->{'hv-vendor-id'}; if (defined(my $flags = $cpuconf->{flags})) { push @$cpuFlags, split(";", $flags); @@ -3360,7 +3440,7 @@ sub get_cpu_options { push @$cpuFlags , '+kvm_pv_eoi' if $kvm; } - add_hyperv_enlightenments($cpuFlags, $winversion, $machine_type, $kvmver, $conf->{bios}, $gpu_passthrough) if $kvm; + add_hyperv_enlightenments($cpuFlags, $winversion, $machine_type, $kvmver, $conf->{bios}, $gpu_passthrough, $hv_vendor_id) if $kvm; push @$cpuFlags, 'enforce' if $cpu ne 'host' && $kvm && $arch eq 'x86_64'; @@ -3527,10 +3607,16 @@ sub config_to_command { next if !$d; my $pcie = $d->{pcie}; - if($pcie){ + if ($pcie) { die "q35 machine model is not enabled" if !$q35; - $pciaddr = print_pcie_addr("hostpci$i"); - }else{ + # win7 wants to have the pcie devices directly on the pcie bus + # instead of in the root port + if ($winversion == 7) { + $pciaddr = print_pcie_addr("hostpci${i}bus0"); + } else { + $pciaddr = print_pcie_addr("hostpci$i"); + } + } else { $pciaddr = print_pci_addr("hostpci$i", $bridges, $arch, $machine_type); } @@ -3541,7 +3627,7 @@ sub config_to_command { if ($d->{'x-vga'}) { $xvga = ',x-vga=on'; $kvm_off = 1; - $vga->{type} = 'none'; + $vga->{type} = 'none' if !defined($conf->{vga}); $gpu_passthrough = 1; if ($conf->{bios} && $conf->{bios} eq 'ovmf') { @@ -3553,9 +3639,8 @@ sub config_to_command { my $sysfspath; if ($d->{mdev} && scalar(@$pcidevices) == 1) { my $id = $pcidevices->[0]->{id}; - my $function = $pcidevices->[0]->{function}; my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i); - $sysfspath = "/sys/bus/pci/devices/0000:$id.$function/$uuid"; + $sysfspath = "/sys/bus/pci/devices/0000:$id/$uuid"; } elsif ($d->{mdev}) { warn "ignoring mediated device with multifunction device\n"; } @@ -3571,7 +3656,7 @@ sub config_to_command { if ($sysfspath) { $devicestr .= ",sysfsdev=$sysfspath"; } else { - $devicestr .= ",host=$pcidevice->{id}.$pcidevice->{function}"; + $devicestr .= ",host=$pcidevice->{id}"; } $devicestr .= ",id=$id$addr"; @@ -3713,7 +3798,7 @@ sub config_to_command { push @$cmd, get_cpu_options($conf, $arch, $kvm, $machine_type, $kvm_off, $kvmver, $winversion, $gpu_passthrough); PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd); - + push @$cmd, '-S' if $conf->{freeze}; push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard}); @@ -3837,7 +3922,7 @@ sub config_to_command { my $queues = ''; if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){ $queues = ",num_queues=$drive->{queues}"; - } + } push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues" if !$scsicontroller->{$controller}; $scsicontroller->{$controller}=1; @@ -3874,6 +3959,23 @@ sub config_to_command { push @$devices, '-device', $netdevicefull; } + if ($conf->{ivshmem}) { + my $ivshmem = PVE::JSONSchema::parse_property_string($ivshmem_fmt, $conf->{ivshmem}); + + my $bus; + if ($q35) { + $bus = print_pcie_addr("ivshmem"); + } else { + $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type); + } + + my $ivshmem_name = $ivshmem->{name} // $vmid; + my $path = '/dev/shm/pve-shm-' . $ivshmem_name; + + push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,"; + push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path,size=$ivshmem->{size}M"; + } + if (!$q35) { # add pci bridges if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { @@ -3889,12 +3991,6 @@ sub config_to_command { } } - # add custom args - if ($conf->{args}) { - my $aa = PVE::Tools::split_args($conf->{args}); - push @$cmd, @$aa; - } - push @$cmd, @$devices; push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); @@ -3903,6 +3999,18 @@ sub config_to_command { push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags); + if (my $vmstate = $conf->{vmstate}) { + my $statepath = PVE::Storage::path($storecfg, $vmstate); + PVE::Storage::activate_volumes($storecfg, [$vmstate]); + push @$cmd, '-loadstate', $statepath; + } + + # add custom args + if ($conf->{args}) { + my $aa = PVE::Tools::split_args($conf->{args}); + push @$cmd, @$aa; + } + return wantarray ? ($cmd, $vollist, $spice_port) : $cmd; } @@ -4058,20 +4166,23 @@ sub vm_deviceplug { } elsif ($deviceid =~ m/^(net)(\d+)$/) { - return undef if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid); + return undef if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid); - my $machine_type = PVE::QemuServer::qemu_machine_pxe($vmid, $conf); - my $use_old_bios_files = undef; - ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type); + my $machine_type = PVE::QemuServer::qemu_machine_pxe($vmid, $conf); + my $use_old_bios_files = undef; + ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type); - my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type); - qemu_deviceadd($vmid, $netdevicefull); - eval { qemu_deviceaddverify($vmid, $deviceid); }; + my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type); + qemu_deviceadd($vmid, $netdevicefull); + eval { + qemu_deviceaddverify($vmid, $deviceid); + qemu_set_link_status($vmid, $deviceid, !$device->{link_down}); + }; if (my $err = $@) { eval { qemu_netdevdel($vmid, $deviceid); }; warn $@ if $@; die $err; - } + } } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) { @@ -4170,7 +4281,11 @@ sub qemu_iothread_add { sub qemu_iothread_del { my($conf, $vmid, $deviceid) = @_; - my $device = parse_drive($deviceid, $conf->{$deviceid}); + my $confid = $deviceid; + if ($deviceid =~ m/^(?:virtioscsi|scsihw)(\d+)$/) { + $confid = 'scsi' . $1; + } + my $device = parse_drive($confid, $conf->{$confid}); if ($device->{iothread}) { my $iothreads = vm_iothreads_list($vmid); qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"}; @@ -4585,6 +4700,7 @@ my $fast_plug_option = { 'description' => 1, 'protection' => 1, 'vmstatestorage' => 1, + 'hookscript' => 1, }; # hotplug changes in [PENDING] @@ -5039,7 +5155,10 @@ sub vm_start { die "you can't start a vm if it's a template\n" if PVE::QemuConfig->is_template($conf); - PVE::QemuConfig->check_lock($conf) if !$skiplock; + my $is_suspended = PVE::QemuConfig->has_lock($conf, 'suspended'); + + PVE::QemuConfig->check_lock($conf) + if !($skiplock || $is_suspended); die "VM $vmid already running\n" if check_running($vmid, undef, $migratedfrom); @@ -5103,6 +5222,14 @@ sub vm_start { } } + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1); + + if ($is_suspended) { + # enforce machine type on suspended vm to ensure HW compatibility + $forcemachine = $conf->{runningmachine}; + print "Resuming suspended VM\n"; + } + my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine); my $migrate_port = 0; @@ -5163,7 +5290,7 @@ sub vm_start { next if !$d; my $pcidevices = $d->{pciid}; foreach my $pcidevice (@$pcidevices) { - my $pciid = $pcidevice->{id}.".".$pcidevice->{function}; + my $pciid = $pcidevice->{id}; my $info = PVE::SysFSTools::pci_device_info("0000:$pciid"); die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support(); @@ -5183,7 +5310,7 @@ sub vm_start { PVE::Storage::activate_volumes($storecfg, $vollist); - if (!check_running($vmid, 1)) { + if (-d "/sys/fs/cgroup/systemd/qemu.slice/$vmid.scope") { eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub {}, errfunc => sub {}); @@ -5193,7 +5320,7 @@ sub vm_start { my $cpuunits = defined($conf->{cpuunits}) ? $conf->{cpuunits} : $defaults->{cpuunits}; - my $start_timeout = $conf->{hugepages} ? 300 : 30; + my $start_timeout = ($conf->{hugepages} || $is_suspended) ? 300 : 30; my %run_params = (timeout => $statefile ? undef : $start_timeout, umask => 0077); my %properties = ( @@ -5300,6 +5427,15 @@ sub vm_start { property => "guest-stats-polling-interval", value => 2) if (!defined($conf->{balloon}) || $conf->{balloon}); + if ($is_suspended && (my $vmstate = $conf->{vmstate})) { + print "Resumed VM, removing state\n"; + delete $conf->@{qw(lock vmstate runningmachine)}; + PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); + PVE::Storage::vdisk_free($storecfg, $vmstate); + PVE::QemuConfig->write_config($vmid, $conf); + } + + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start'); }); } @@ -5361,10 +5497,19 @@ sub vm_human_monitor_command { } sub vm_commandline { - my ($storecfg, $vmid) = @_; + my ($storecfg, $vmid, $snapname) = @_; my $conf = PVE::QemuConfig->load_config($vmid); + if ($snapname) { + my $snapshot = $conf->{snapshots}->{$snapname}; + die "snapshot '$snapname' does not exist\n" if !defined($snapshot); + + $snapshot->{digest} = $conf->{digest}; # keep file digest for API + + $conf = $snapshot; + } + my $defaults = load_defaults(); my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults); @@ -5417,6 +5562,15 @@ sub vm_stop_cleanup { unlink "/var/run/qemu-server/${vmid}.$ext"; } + if ($conf->{ivshmem}) { + my $ivshmem = PVE::JSONSchema::parse_property_string($ivshmem_fmt, $conf->{ivshmem}); + # just delete it for now, VMs which have this already open do not + # are affected, but new VMs will get a separated one. If this + # becomes an issue we either add some sort of ref-counting or just + # add a "don't delete on stop" flag to the ivshmem format. + unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid); + } + foreach my $key (keys %$conf) { next if $key !~ m/^hostpci(\d+)$/; my $hostpciindex = $1; @@ -5424,7 +5578,7 @@ sub vm_stop_cleanup { my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex); foreach my $pci (@{$d->{pciid}}) { - my $pciid = $pci->{id} . "." . $pci->{function}; + my $pciid = $pci->{id}; PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid); } } @@ -5463,6 +5617,7 @@ sub vm_stop { my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup}); $timeout = $opts->{down} if $opts->{down}; } + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop'); } $timeout = 60 if !defined($timeout); @@ -5527,25 +5682,92 @@ sub vm_stop { } sub vm_suspend { - my ($vmid, $skiplock) = @_; + my ($vmid, $skiplock, $includestate, $statestorage) = @_; + + my $conf; + my $path; + my $storecfg; + my $vmstate; PVE::QemuConfig->lock_config($vmid, sub { - my $conf = PVE::QemuConfig->load_config($vmid); + $conf = PVE::QemuConfig->load_config($vmid); + my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup'); PVE::QemuConfig->check_lock($conf) - if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup')); + if !($skiplock || $is_backing_up); + + die "cannot suspend to disk during backup\n" + if $is_backing_up && $includestate; - vm_mon_cmd($vmid, "stop"); + if ($includestate) { + $conf->{lock} = 'suspending'; + my $date = strftime("%Y-%m-%d", localtime(time())); + $storecfg = PVE::Storage::config(); + $vmstate = PVE::QemuConfig->__snapshot_save_vmstate($vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1); + $path = PVE::Storage::path($storecfg, $vmstate); + PVE::QemuConfig->write_config($vmid, $conf); + } else { + vm_mon_cmd($vmid, "stop"); + } }); + + if ($includestate) { + # save vm state + PVE::Storage::activate_volumes($storecfg, [$vmstate]); + + eval { + vm_mon_cmd($vmid, "savevm-start", statefile => $path); + for(;;) { + my $state = vm_mon_cmd_nocheck($vmid, "query-savevm"); + if (!$state->{status}) { + die "savevm not active\n"; + } elsif ($state->{status} eq 'active') { + sleep(1); + next; + } elsif ($state->{status} eq 'completed') { + print "State saved, quitting\n"; + last; + } elsif ($state->{status} eq 'failed' && $state->{error}) { + die "query-savevm failed with error '$state->{error}'\n" + } else { + die "query-savevm returned status '$state->{status}'\n"; + } + } + }; + my $err = $@; + + PVE::QemuConfig->lock_config($vmid, sub { + $conf = PVE::QemuConfig->load_config($vmid); + if ($err) { + # cleanup, but leave suspending lock, to indicate something went wrong + eval { + vm_mon_cmd($vmid, "savevm-end"); + PVE::Storage::deactivate_volumes($storecfg, [$vmstate]); + PVE::Storage::vdisk_free($storecfg, $vmstate); + delete $conf->@{qw(vmstate runningmachine)}; + PVE::QemuConfig->write_config($vmid, $conf); + }; + warn $@ if $@; + die $err; + } + + die "lock changed unexpectedly\n" + if !PVE::QemuConfig->has_lock($conf, 'suspending'); + + vm_qmp_command($vmid, { execute => "quit" }); + $conf->{lock} = 'suspended'; + PVE::QemuConfig->write_config($vmid, $conf); + }); + } } sub vm_resume { my ($vmid, $skiplock, $nocheck) = @_; PVE::QemuConfig->lock_config($vmid, sub { - - my $res = vm_mon_cmd($vmid, 'query-status'); + my $vm_mon_cmd = $nocheck ? \&vm_mon_cmd_nocheck : \&vm_mon_cmd; + my $res = $vm_mon_cmd->($vmid, 'query-status'); my $resume_cmd = 'cont'; if ($res->{status} && $res->{status} eq 'suspended') { @@ -5558,12 +5780,9 @@ sub vm_resume { PVE::QemuConfig->check_lock($conf) if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup')); - - vm_mon_cmd($vmid, $resume_cmd); - - } else { - vm_mon_cmd_nocheck($vmid, $resume_cmd); } + + $vm_mon_cmd->($vmid, $resume_cmd); }); } @@ -5682,7 +5901,6 @@ sub restore_update_config_line { return if $line =~ m/^lock:/; return if $line =~ m/^unused\d+:/; return if $line =~ m/^parent:/; - return if $line =~ m/^template:/; # restored VM is never a template my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg'); if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) { @@ -6043,6 +6261,24 @@ sub restore_vma_archive { $storage_limits{$storeid} = $bwlimit; $virtdev_hash->{$virtdev} = $devinfo->{$devname}; + } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) { + my $virtdev = $1; + my $drive = parse_drive($virtdev, $2); + if (drive_is_cloudinit($drive)) { + my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file}); + my $scfg = PVE::Storage::storage_config($cfg, $storeid); + my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback + + my $d = { + format => $format, + storeid => $opts->{storage} // $storeid, + size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE, + file => $drive->{file}, # to make drive_is_cloudinit check possible + name => "vm-$vmid-cloudinit", + is_cloudinit => 1, + }; + $virtdev_hash->{$virtdev} = $d; + } } } @@ -6064,10 +6300,9 @@ sub restore_vma_archive { foreach_drive($oldconf, sub { my ($ds, $drive) = @_; - return if drive_is_cdrom($drive); + return if !$drive->{is_cloudinit} && drive_is_cdrom($drive); my $volid = $drive->{file}; - return if !$volid || $volid =~ m|^/|; my ($path, $owner) = PVE::Storage::path($cfg, $volid); @@ -6083,8 +6318,7 @@ sub restore_vma_archive { } }); - # delete vmstate files - # since after the restore we have no snapshots anymore + # delete vmstate files, after the restore we have no snapshots anymore foreach my $snapname (keys %{$oldconf->{snapshots}}) { my $snap = $oldconf->{snapshots}->{$snapname}; if ($snap->{vmstate}) { @@ -6113,22 +6347,30 @@ sub restore_vma_archive { my $supported = grep { $_ eq $d->{format} } @$validFormats; $d->{format} = $defFormat if !$supported; - my $volid = PVE::Storage::vdisk_alloc($cfg, $storeid, $vmid, - $d->{format}, undef, $alloc_size); + my $name; + if ($d->{is_cloudinit}) { + $name = $d->{name}; + $name .= ".$d->{format}" if $d->{format} ne 'raw'; + } + + my $volid = PVE::Storage::vdisk_alloc($cfg, $storeid, $vmid, $d->{format}, $name, $alloc_size); print STDERR "new volume ID is '$volid'\n"; $d->{volid} = $volid; - my $path = PVE::Storage::path($cfg, $volid); - PVE::Storage::activate_volumes($cfg,[$volid]); + PVE::Storage::activate_volumes($cfg, [$volid]); my $write_zeros = 1; if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) { $write_zeros = 0; } - print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n"; + if (!$d->{is_cloudinit}) { + my $path = PVE::Storage::path($cfg, $volid); + + print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n"; - print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n"; + print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n"; + } $map->{$virtdev} = $volid; } @@ -6364,9 +6606,9 @@ sub do_snapshots_with_qemu { my ($storecfg, $volid) = @_; my $storage_name = PVE::Storage::parse_volume_id($volid); + my $scfg = $storecfg->{ids}->{$storage_name}; - if ($qemu_snap_storage->{$storecfg->{ids}->{$storage_name}->{type}} - && !$storecfg->{ids}->{$storage_name}->{krbd}){ + if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){ return 1; } @@ -6409,6 +6651,23 @@ sub template_create { }); } +sub convert_iscsi_path { + my ($path) = @_; + + if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) { + my $portal = $1; + my $target = $2; + my $lun = $3; + + my $initiator_name = get_initiator_name(); + + return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,". + "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw"; + } + + die "cannot convert iscsi path '$path', unkown format\n"; +} + sub qemu_img_convert { my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_; @@ -6429,13 +6688,32 @@ sub qemu_img_convert { my $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname); my $dst_path = PVE::Storage::path($storecfg, $dst_volid); + my $src_is_iscsi = ($src_path =~ m|^iscsi://|); + my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|); + my $cmd = []; push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n'; push @$cmd, '-l', "snapshot.name=$snapname" if($snapname && $src_format eq "qcow2"); push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool'; push @$cmd, '-T', 'none' if $src_scfg->{type} eq 'zfspool'; - push @$cmd, '-f', $src_format, '-O', $dst_format, $src_path; - if ($is_zero_initialized) { + + if ($src_is_iscsi) { + push @$cmd, '--image-opts'; + $src_path = convert_iscsi_path($src_path); + } else { + push @$cmd, '-f', $src_format; + } + + if ($dst_is_iscsi) { + push @$cmd, '--target-image-opts'; + $dst_path = convert_iscsi_path($dst_path); + } else { + push @$cmd, '-O', $dst_format; + } + + push @$cmd, $src_path; + + if (!$dst_is_iscsi && $is_zero_initialized) { push @$cmd, "zeroinit:$dst_path"; } else { push @$cmd, $dst_path; @@ -6470,7 +6748,7 @@ sub qemu_img_format { } sub qemu_drive_mirror { - my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $skipcomplete, $qga) = @_; + my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $skipcomplete, $qga, $bwlimit) = @_; $jobs = {} if !$jobs; @@ -6497,13 +6775,19 @@ sub qemu_drive_mirror { my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target }; $opts->{format} = $format if $format; - print "drive mirror is starting for drive-$drive\n"; - - eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; #if a job already run for this device,it's throw an error + if (defined($bwlimit)) { + $opts->{speed} = $bwlimit * 1024; + print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n"; + } else { + print "drive mirror is starting for drive-$drive\n"; + } + # if a job already runs for this device we get an error, catch it for cleanup + eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; if (my $err = $@) { eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) }; - die "mirroring error: $err"; + warn "$@\n" if $@; + die "mirroring error: $err\n"; } qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete, $qga); @@ -6641,7 +6925,7 @@ sub qemu_blockjobs_cancel { sub clone_disk { my ($storecfg, $vmid, $running, $drivename, $drive, $snapname, - $newvmid, $storage, $format, $full, $newvollist, $jobs, $skipcomplete, $qga) = @_; + $newvmid, $storage, $format, $full, $newvollist, $jobs, $skipcomplete, $qga, $bwlimit) = @_; my $newvolid; @@ -6661,11 +6945,10 @@ sub clone_disk { my $name = undef; if (drive_is_cloudinit($drive)) { $name = "vm-$newvmid-cloudinit"; - # cloudinit only supports raw and qcow2 atm: - if ($dst_format eq 'qcow2') { - $name .= '.qcow2'; - } elsif ($dst_format ne 'raw') { - die "clone: unhandled format for cloudinit image\n"; + $snapname = undef; + # we only get here if it's supported by QEMU_FORMAT_RE, so just accept + if ($dst_format ne 'raw') { + $name .= ".$dst_format"; } } $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)); @@ -6675,6 +6958,7 @@ sub clone_disk { my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid); if (!$running || $snapname) { + # TODO: handle bwlimits qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit); } else { @@ -6684,7 +6968,7 @@ sub clone_disk { if $drive->{iothread}; } - qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs, $skipcomplete, $qga); + qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs, $skipcomplete, $qga, $bwlimit); } } @@ -6830,12 +7114,15 @@ sub scsihw_infos { } sub add_hyperv_enlightenments { - my ($cpuFlags, $winversion, $machine_type, $kvmver, $bios, $gpu_passthrough) = @_; + my ($cpuFlags, $winversion, $machine_type, $kvmver, $bios, $gpu_passthrough, $hv_vendor_id) = @_; return if $winversion < 6; return if $bios && $bios eq 'ovmf' && $winversion < 8; - push @$cpuFlags , 'hv_vendor_id=proxmox' if $gpu_passthrough; + if ($gpu_passthrough || defined($hv_vendor_id)) { + $hv_vendor_id //= 'proxmox'; + push @$cpuFlags , "hv_vendor_id=$hv_vendor_id"; + } if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { push @$cpuFlags , 'hv_spinlocks=0x1fff';