X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=7456158fda31c3b5e0991e3e55193bfb8ab27ac5;hb=ba9e10002f92ba31d157ddc2cf2c38a0472d725b;hp=3044b7f3ff62018a69ca4bccbfedffdd2e5b1876;hpb=f34ebd5259c4872a54e3d4362fd8b197819a1406;p=qemu-server.git diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 3044b7f..e47ed05 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -31,6 +31,8 @@ use PVE::QMPClient; use PVE::RPCEnvironment; use Time::HiRes qw(gettimeofday); +my $qemu_snap_storage = {rbd => 1, sheepdog => 1}; + my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); # Note about locking: we use flock on the config file protect @@ -65,76 +67,12 @@ PVE::JSONSchema::register_standard_option('pve-snapshot-name', { #no warnings 'redefine'; -unless(defined(&_VZSYSCALLS_H_)) { - eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_); - require 'sys/syscall.ph'; - if(defined(&__x86_64__)) { - eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus); - eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod); - eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod); - eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt); - eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr); - eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate); - eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid); - eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit); - } - elsif(defined( &__i386__) ) { - eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod); - eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod); - eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt); - eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr); - eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate); - eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus); - eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid); - eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit); - } else { - die("no fairsched syscall for this arch"); - } - require 'asm/ioctl.ph'; - eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION); -} - -sub fairsched_mknod { - my ($parent, $weight, $desired) = @_; - - return syscall(&__NR_fairsched_mknod, int($parent), int($weight), int($desired)); -} - -sub fairsched_rmnod { - my ($id) = @_; - - return syscall(&__NR_fairsched_rmnod, int($id)); -} - -sub fairsched_mvpr { - my ($pid, $newid) = @_; - - return syscall(&__NR_fairsched_mvpr, int($pid), int($newid)); -} - -sub fairsched_vcpus { - my ($id, $vcpus) = @_; - - return syscall(&__NR_fairsched_vcpus, int($id), int($vcpus)); -} - -sub fairsched_rate { - my ($id, $op, $rate) = @_; - - return syscall(&__NR_fairsched_rate, int($id), int($op), int($rate)); -} +sub cgroups_write { + my ($controller, $vmid, $option, $value) = @_; -use constant FAIRSCHED_SET_RATE => 0; -use constant FAIRSCHED_DROP_RATE => 1; -use constant FAIRSCHED_GET_RATE => 2; + my $path = "/sys/fs/cgroup/$controller/qemu.slice/$vmid.scope/$option"; + PVE::ProcFSTools::write_proc_entry($path, $value); -sub fairsched_cpulimit { - my ($id, $limit) = @_; - - my $cpulim1024 = int($limit * 1024 / 100); - my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE; - - return fairsched_rate($id, $op, $cpulim1024); } my $nodename = PVE::INotify::nodename(); @@ -152,12 +90,6 @@ mkdir $lock_dir; my $pcisysfs = "/sys/bus/pci"; my $confdesc = { - iothread => { - optional => 1, - type => 'boolean', - description => "Enable iothread dataplane.", - default => 0, - }, onboot => { optional => 1, type => 'boolean', @@ -172,9 +104,9 @@ my $confdesc = { }, hotplug => { optional => 1, - type => 'boolean', - description => "Allow hotplug for disk and network device", - default => 0, + type => 'string', format => 'pve-hotplug-features', + description => "Selectively enable hotplug features. This is a comma separated list of hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable hotplug completely. Value '1' is an alias for the default 'network,disk,usb'.", + default => 'network,disk,usb', }, reboot => { optional => 1, @@ -190,9 +122,10 @@ my $confdesc = { }, cpulimit => { optional => 1, - type => 'integer', - description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.", + type => 'number', + description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.", minimum => 0, + maximum => 128, default => 0, }, cpuunits => { @@ -240,7 +173,7 @@ my $confdesc = { optional => 1, type => 'string', description => "scsi controller model", - enum => [qw(lsi lsi53c810 virtio-scsi-pci megasas pvscsi)], + enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)], default => 'lsi', }, description => { @@ -306,12 +239,18 @@ EODESC minimum => 1, default => 1, }, - maxcpus => { + numa => { + optional => 1, + type => 'boolean', + description => "Enable/disable Numa.", + default => 0, + }, + vcpus => { optional => 1, type => 'integer', - description => "Maximum cpus for hotplug.", + description => "Number of hotplugged vcpus.", minimum => 1, - default => 1, + default => 0, }, acpi => { optional => 1, @@ -367,12 +306,7 @@ EODESC pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)', default => 'now', }, - startup => { - optional => 1, - type => 'string', format => 'pve-qm-startup', - typetext => '[[order=]\d+] [,up=\d+] [,down=\d+] ', - description => "Startup and shutdown behavior. Order is a non-negative number defining the general startup order. Shutdown in done with reverse ordering. Additionally you can set the 'up' or 'down' delay in seconds, which specifies a delay to wait before the next VM is started or stopped.", - }, + startup => get_standard_option('pve-startup-order'), template => { optional => 1, type => 'boolean', @@ -418,7 +352,7 @@ EODESCR optional => 1, description => "Emulated CPU type.", type => 'string', - enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom Conroe Penryn Nehalem Westmere SandyBridge Haswell Broadwell Opteron_G1 Opteron_G2 Opteron_G3 Opteron_G4 Opteron_G5 host) ], + enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom Conroe Penryn Nehalem Westmere SandyBridge IvyBridge Haswell Broadwell Opteron_G1 Opteron_G2 Opteron_G3 Opteron_G4 Opteron_G5 host) ], default => 'kvm64', }, parent => get_standard_option('pve-snapshot-name', { @@ -450,6 +384,12 @@ EODESCR maxLength => 256, optional => 1, }, + protection => { + optional => 1, + type => 'boolean', + description => "Sets the protection flag of the VM. This will prevent the remove operation.", + default => 0, + }, }; # what about other qemu settings ? @@ -483,15 +423,31 @@ my $MAX_UNUSED_DISKS = 8; my $MAX_HOSTPCI_DEVICES = 4; my $MAX_SERIAL_PORTS = 4; my $MAX_PARALLEL_PORTS = 3; +my $MAX_NUMA = 8; +my $MAX_MEM = 4194304; +my $STATICMEM = 1024; + +my $numadesc = { + optional => 1, + type => 'string', format => 'pve-qm-numanode', + typetext => "cpus=[[,hostnodes=] [,policy=]]", + description => "numa topology", +}; +PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc); + +for (my $i = 0; $i < $MAX_NUMA; $i++) { + $confdesc->{"numa$i"} = $numadesc; +} my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000', 'pcnet', 'virtio', - 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3']; + 'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3', + 'e1000-82540em', 'e1000-82544gc', 'e1000-82545em']; my $nic_model_list_txt = join(' ', sort @$nic_model_list); my $netdesc = { optional => 1, type => 'string', format => 'pve-qm-net', - typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=][,queues=][,rate=][,tag=][,firewall=0|1]", + typetext => "MODEL=XX:XX:XX:XX:XX:XX [,bridge=][,queues=][,rate=] [,tag=][,firewall=0|1],link_down=0|1]", description => < 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,serial=serial][,model=model]', description => "Use volume as IDE hard disk or CD-ROM (n is 0 to " .($MAX_IDE_DISKS -1) . ").", }; PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc); @@ -533,7 +489,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc); my $scsidesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,iothread=on] [,queues=] [,serial=serial]', description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to " . ($MAX_SCSI_DISKS - 1) . ").", }; PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); @@ -541,7 +497,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); my $satadesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,serial=serial]', description => "Use volume as SATA hard disk or CD-ROM (n is 0 to " . ($MAX_SATA_DISKS - 1). ").", }; PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc); @@ -549,7 +505,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc); my $virtiodesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,iothread=on] [,serial=serial]', description => "Use volume as VIRTIO hard disk (n is 0 to " . ($MAX_VIRTIO_DISKS - 1) . ").", }; PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc); @@ -578,7 +534,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc); my $hostpcidesc = { optional => 1, type => 'string', format => 'pve-qm-hostpci', - typetext => "[host=]HOSTPCIDEVICE [,driver=kvm|vfio] [,rombar=on|off] [,pcie=0|1] [,x-vga=on|off]", + typetext => "[host=]HOSTPCIDEVICE [,rombar=on|off] [,pcie=0|1] [,x-vga=on|off]", description => </dev/null`; - if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)[,\s]/) { + if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) { $kvm_user_version = $2; } @@ -851,6 +807,36 @@ sub create_conf_nolock { PVE::Tools::file_set_contents($filename, $data); } +sub parse_hotplug_features { + my ($data) = @_; + + my $res = {}; + + return $res if $data eq '0'; + + $data = $confdesc->{hotplug}->{default} if $data eq '1'; + + foreach my $feature (PVE::Tools::split_list($data)) { + if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) { + $res->{$1} = 1; + } else { + warn "ignoring unknown hotplug feature '$feature'\n"; + } + } + return $res; +} + +PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features); +sub pve_verify_hotplug_features { + my ($value, $noerr) = @_; + + return $value if parse_hotplug_features($value); + + return undef if $noerr; + + die "unable to parse hotplug option\n"; +} + my $parse_size = sub { my ($value) = @_; @@ -888,7 +874,8 @@ my $format_size = sub { # ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]] # [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no] # [,rerror=ignore|report|stop][,werror=enospc|ignore|report|stop] -# [,aio=native|threads][,discard=ignore|on] +# [,aio=native|threads][,discard=ignore|on][,iothread=on] +# [,serial=serial][,model=model] sub parse_drive { my ($key, $data) = @_; @@ -909,7 +896,7 @@ sub parse_drive { foreach my $p (split (/,/, $data)) { next if $p =~ m/^\s*$/; - if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio|bps|mbps|mbps_max|bps_rd|mbps_rd|mbps_rd_max|bps_wr|mbps_wr|mbps_wr_max|iops|iops_max|iops_rd|iops_rd_max|iops_wr|iops_wr_max|size|discard)=(.+)$/) { + if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio|bps|mbps|mbps_max|bps_rd|mbps_rd|mbps_rd_max|bps_wr|mbps_wr|mbps_wr_max|iops|iops_max|iops_rd|iops_rd_max|iops_wr|iops_wr_max|size|discard|iothread|queues|serial|model)=(.+)$/) { my ($k, $v) = ($1, $2); $k = 'file' if $k eq 'volume'; @@ -933,10 +920,6 @@ sub parse_drive { return undef if !$res->{file}; - if($res->{file} =~ m/\.(raw|cow|qcow|qcow2|vmdk|cloop)$/){ - $res->{format} = $1; - } - return undef if $res->{cache} && $res->{cache} !~ m/^(off|none|writethrough|writeback|unsafe|directsync)$/; return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/; @@ -945,12 +928,14 @@ sub parse_drive { return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/; return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/; return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/; - return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/; + return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qed|qcow2|vmdk|cloop)$/; return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/; return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/; return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/; return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/; return undef if $res->{discard} && $res->{discard} !~ m/^(ignore|on)$/; + return undef if $res->{iothread} && $res->{iothread} !~ m/^(on)$/; + return undef if $res->{queues} && ($res->{queues} !~ m/^\d+$/ || $res->{queues} < 2); return undef if $res->{mbps_rd} && $res->{mbps}; return undef if $res->{mbps_wr} && $res->{mbps}; @@ -973,7 +958,6 @@ sub parse_drive { return undef if $res->{iops_wr} && $res->{iops_wr} !~ m/^\d+$/; return undef if $res->{iops_wr_max} && $res->{iops_wr_max} !~ m/^\d+$/; - if ($res->{size}) { return undef if !defined($res->{size} = &$parse_size($res->{size})); } @@ -992,13 +976,13 @@ sub parse_drive { return $res; } -my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio discard iops iops_rd iops_wr iops_max iops_rd_max iops_wr_max); +my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio discard iops iops_rd iops_wr iops_max iops_rd_max iops_wr_max serial); sub print_drive { my ($vmid, $drive) = @_; my $opts = ''; - foreach my $o (@qemu_drive_options, 'mbps', 'mbps_rd', 'mbps_wr', 'mbps_max', 'mbps_rd_max', 'mbps_wr_max', 'backup') { + foreach my $o (@qemu_drive_options, 'mbps', 'mbps_rd', 'mbps_wr', 'mbps_max', 'mbps_rd_max', 'mbps_wr_max', 'backup', 'iothread', 'queues') { $opts .= ",$o=$drive->{$o}" if $drive->{$o}; } @@ -1006,6 +990,10 @@ sub print_drive { $opts .= ",size=" . &$format_size($drive->{size}); } + if (my $model = $drive->{model}) { + $opts .= ",model=$model"; + } + return "$drive->{file}$opts"; } @@ -1096,10 +1084,10 @@ sub print_drivedevice_full { if ($drive->{interface} eq 'virtio') { my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges); $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}$pciaddr"; - $device .= ",iothread=iothread0" if $conf->{iothread}; + $device .= ",iothread=iothread-$drive->{interface}$drive->{index}" if $drive->{iothread}; } elsif ($drive->{interface} eq 'scsi') { - $maxdev = ($conf->{scsihw} && ($conf->{scsihw} !~ m/^lsi/)) ? 256 : 7; - my $controller = int($drive->{index} / $maxdev); + + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); my $unit = $drive->{index} % $maxdev; my $devicetype = 'hd'; my $path = ''; @@ -1126,9 +1114,9 @@ sub print_drivedevice_full { } if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){ - $device = "scsi-$devicetype,bus=scsihw$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; + $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; } else { - $device = "scsi-$devicetype,bus=scsihw$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; + $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; } } elsif ($drive->{interface} eq 'ide'){ @@ -1138,6 +1126,9 @@ sub print_drivedevice_full { my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd"; $device = "ide-$devicetype,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; + if ($devicetype eq 'hd' && (my $model = $drive->{model})) { + $device .= ",model=$model"; + } } elsif ($drive->{interface} eq 'sata'){ my $controller = int($drive->{index} / $MAX_SATA_DISKS); my $unit = $drive->{index} % $MAX_SATA_DISKS; @@ -1171,34 +1162,54 @@ sub get_initiator_name { sub print_drive_full { my ($storecfg, $vmid, $drive) = @_; + my $path; + my $volid = $drive->{file}; + my $format; + + if (drive_is_cdrom($drive)) { + $path = get_iso_path($storecfg, $vmid, $volid); + } else { + my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1); + if ($storeid) { + $path = PVE::Storage::path($storecfg, $volid); + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + $format = qemu_img_format($scfg, $volname); + } else { + $path = $volid; + } + } + my $opts = ''; foreach my $o (@qemu_drive_options) { next if $o eq 'bootindex'; $opts .= ",$o=$drive->{$o}" if $drive->{$o}; } + $opts .= ",format=$format" if $format && !$drive->{format}; + foreach my $o (qw(bps bps_rd bps_wr)) { my $v = $drive->{"m$o"}; $opts .= ",$o=" . int($v*1024*1024) if $v; } - # use linux-aio by default (qemu default is threads) - $opts .= ",aio=native" if !$drive->{aio}; + my $cache_direct = 0; - my $path; - my $volid = $drive->{file}; - if (drive_is_cdrom($drive)) { - $path = get_iso_path($storecfg, $vmid, $volid); - } else { - if ($volid =~ m|^/|) { - $path = $volid; + if (my $cache = $drive->{cache}) { + $cache_direct = $cache =~ /^(?:off|none|directsync)$/; + } elsif (!drive_is_cdrom($drive)) { + $opts .= ",cache=none"; + $cache_direct = 1; + } + + # aio native works only with O_DIRECT + if (!$drive->{aio}) { + if($cache_direct) { + $opts .= ",aio=native"; } else { - $path = PVE::Storage::path($storecfg, $volid); + $opts .= ",aio=threads"; } } - $opts .= ",cache=none" if !$drive->{cache} && !drive_is_cdrom($drive); - my $detectzeroes = $drive->{discard} ? "unmap" : "on"; $opts .= ",detect-zeroes=$detectzeroes" if !drive_is_cdrom($drive); @@ -1208,7 +1219,7 @@ sub print_drive_full { } sub print_netdevice_full { - my ($vmid, $conf, $net, $netid, $bridges) = @_; + my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files) = @_; my $bootorder = $conf->{boot} || $confdesc->{boot}->{default}; @@ -1217,17 +1228,31 @@ sub print_netdevice_full { $device = 'virtio-net-pci'; }; - # qemu > 0.15 always try to boot from network - we disable that by - # not loading the pxe rom file - my $extra = ($bootorder !~ m/n/) ? "romfile=," : ''; my $pciaddr = print_pci_addr("$netid", $bridges); - my $tmpstr = "$device,${extra}mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid"; + my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid"; if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){ #Consider we have N queues, the number of vectors needed is 2*N + 2 (plus one config interrupt and control vq) my $vectors = $net->{queues} * 2 + 2; $tmpstr .= ",vectors=$vectors,mq=on"; } $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ; + + if ($use_old_bios_files) { + my $romfile; + if ($device eq 'virtio-net-pci') { + $romfile = 'pxe-virtio.rom'; + } elsif ($device eq 'e1000') { + $romfile = 'pxe-e1000.rom'; + } elsif ($device eq 'ne2k') { + $romfile = 'pxe-ne2k_pci.rom'; + } elsif ($device eq 'pcnet') { + $romfile = 'pxe-pcnet.rom'; + } elsif ($device eq 'rtl8139') { + $romfile = 'pxe-rtl8139.rom'; + } + $tmpstr .= ",romfile=$romfile" if $romfile; + } + return $tmpstr; } @@ -1272,6 +1297,31 @@ sub drive_is_cdrom { } +sub parse_numa { + my ($data) = @_; + + my $res = {}; + + foreach my $kvp (split(/,/, $data)) { + + if ($kvp =~ m/^memory=(\S+)$/) { + $res->{memory} = $1; + } elsif ($kvp =~ m/^policy=(preferred|bind|interleave)$/) { + $res->{policy} = $1; + } elsif ($kvp =~ m/^cpus=(\d+)(-(\d+))?$/) { + $res->{cpus}->{start} = $1; + $res->{cpus}->{end} = $3; + } elsif ($kvp =~ m/^hostnodes=(\d+)(-(\d+))?$/) { + $res->{hostnodes}->{start} = $1; + $res->{hostnodes}->{end} = $3; + } else { + return undef; + } + } + + return $res; +} + sub parse_hostpci { my ($value) = @_; @@ -1293,8 +1343,6 @@ sub parse_hostpci { my $pcidevices = lspci($2); $res->{pciid} = $pcidevices->{$2}; } - } elsif ($kv =~ m/^driver=(kvm|vfio)$/) { - $res->{driver} = $1; } elsif ($kv =~ m/^rombar=(on|off)$/) { $res->{rombar} = $1; } elsif ($kv =~ m/^x-vga=(on|off)$/) { @@ -1319,7 +1367,7 @@ sub parse_net { foreach my $kvp (split(/,/, $data)) { - if ($kvp =~ m/^(ne2k_pci|e1000|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er|vmxnet3)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) { + if ($kvp =~ m/^(ne2k_pci|e1000|e1000-82540em|e1000-82544gc|e1000-82545em|rtl8139|pcnet|virtio|ne2k_isa|i82551|i82557b|i82559er|vmxnet3)(=([0-9a-f]{2}(:[0-9a-f]{2}){5}))?$/i) { my $model = lc($1); my $mac = defined($3) ? uc($3) : PVE::Tools::random_ether_addr(); $res->{model} = $model; @@ -1332,8 +1380,10 @@ sub parse_net { $res->{rate} = $1; } elsif ($kvp =~ m/^tag=(\d+)$/) { $res->{tag} = $1; - } elsif ($kvp =~ m/^firewall=(\d+)$/) { + } elsif ($kvp =~ m/^firewall=([01])$/) { $res->{firewall} = $1; + } elsif ($kvp =~ m/^link_down=([01])$/) { + $res->{link_down} = $1; } else { return undef; } @@ -1353,7 +1403,9 @@ sub print_net { $res .= ",bridge=$net->{bridge}" if $net->{bridge}; $res .= ",rate=$net->{rate}" if $net->{rate}; $res .= ",tag=$net->{tag}" if $net->{tag}; - $res .= ",firewall=$net->{firewall}" if $net->{firewall}; + $res .= ",firewall=1" if $net->{firewall}; + $res .= ",link_down=1" if $net->{link_down}; + $res .= ",queues=$net->{queues}" if $net->{queues}; return $res; } @@ -1389,6 +1441,96 @@ sub add_unused_volume { return $key; } +sub vm_is_volid_owner { + my ($storecfg, $vmid, $volid) = @_; + + if ($volid !~ m|^/|) { + my ($path, $owner); + eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); }; + if ($owner && ($owner == $vmid)) { + return 1; + } + } + + return undef; +} + +sub split_flagged_list { + my $text = shift || ''; + $text =~ s/[,;]/ /g; + $text =~ s/^\s+//; + return { map { /^(!?)(.*)$/ && ($2, $1) } ($text =~ /\S+/g) }; +} + +sub join_flagged_list { + my ($how, $lst) = @_; + join $how, map { $lst->{$_} . $_ } keys %$lst; +} + +sub vmconfig_delete_pending_option { + my ($conf, $key, $force) = @_; + + delete $conf->{pending}->{$key}; + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + $pending_delete_hash->{$key} = $force ? '!' : ''; + $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash); +} + +sub vmconfig_undelete_pending_option { + my ($conf, $key) = @_; + + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + delete $pending_delete_hash->{$key}; + + if (%$pending_delete_hash) { + $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash); + } else { + delete $conf->{pending}->{delete}; + } +} + +sub vmconfig_register_unused_drive { + my ($storecfg, $vmid, $conf, $drive) = @_; + + if (!drive_is_cdrom($drive)) { + my $volid = $drive->{file}; + if (vm_is_volid_owner($storecfg, $vmid, $volid)) { + add_unused_volume($conf, $volid, $vmid); + } + } +} + +sub vmconfig_cleanup_pending { + my ($conf) = @_; + + # remove pending changes when nothing changed + my $changes; + foreach my $opt (keys %{$conf->{pending}}) { + if (defined($conf->{$opt}) && ($conf->{pending}->{$opt} eq $conf->{$opt})) { + $changes = 1; + delete $conf->{pending}->{$opt}; + } + } + + my $current_delete_hash = split_flagged_list($conf->{pending}->{delete}); + my $pending_delete_hash = {}; + while (my ($opt, $force) = each %$current_delete_hash) { + if (defined($conf->{$opt})) { + $pending_delete_hash->{$opt} = $force; + } else { + $changes = 1; + } + } + + if (%$pending_delete_hash) { + $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash); + } else { + delete $conf->{pending}->{delete}; + } + + return $changes; +} + my $valid_smbios1_options = { manufacturer => '\S+', product => '\S+', @@ -1452,6 +1594,17 @@ sub verify_bootdisk { die "invalid boot disk '$value'\n"; } +PVE::JSONSchema::register_format('pve-qm-numanode', \&verify_numa); +sub verify_numa { + my ($value, $noerr) = @_; + + return $value if parse_numa($value); + + return undef if $noerr; + + die "unable to parse numa options\n"; +} + PVE::JSONSchema::register_format('pve-qm-net', \&verify_net); sub verify_net { my ($value, $noerr) = @_; @@ -1518,41 +1671,6 @@ sub parse_watchdog { return $res; } -PVE::JSONSchema::register_format('pve-qm-startup', \&verify_startup); -sub verify_startup { - my ($value, $noerr) = @_; - - return $value if parse_startup($value); - - return undef if $noerr; - - die "unable to parse startup options\n"; -} - -sub parse_startup { - my ($value) = @_; - - return undef if !$value; - - my $res = {}; - - foreach my $p (split(/,/, $value)) { - next if $p =~ m/^\s*$/; - - if ($p =~ m/^(order=)?(\d+)$/) { - $res->{order} = $2; - } elsif ($p =~ m/^up=(\d+)$/) { - $res->{up} = $1; - } elsif ($p =~ m/^down=(\d+)$/) { - $res->{down} = $1; - } else { - return undef; - } - } - - return $res; -} - sub parse_usb_device { my ($value) = @_; @@ -1782,6 +1900,7 @@ sub parse_vm_config { my $res = { digest => Digest::SHA::sha1_hex($raw), snapshots => {}, + pending => {}, }; $filename =~ m|/qemu-server/(\d+)\.conf$| @@ -1790,26 +1909,42 @@ sub parse_vm_config { my $vmid = $1; my $conf = $res; - my $descr = ''; + my $descr; + my $section = ''; my @lines = split(/\n/, $raw); foreach my $line (@lines) { next if $line =~ m/^\s*$/; - if ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) { - my $snapname = $1; - $conf->{description} = $descr if $descr; - $descr = ''; - $conf = $res->{snapshots}->{$snapname} = {}; + if ($line =~ m/^\[PENDING\]\s*$/i) { + $section = 'pending'; + if (defined($descr)) { + $descr =~ s/\s+$//; + $conf->{description} = $descr; + } + $descr = undef; + $conf = $res->{$section} = {}; + next; + + } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) { + $section = $1; + if (defined($descr)) { + $descr =~ s/\s+$//; + $conf->{description} = $descr; + } + $descr = undef; + $conf = $res->{snapshots}->{$section} = {}; next; } if ($line =~ m/^\#(.*)\s*$/) { + $descr = '' if !defined($descr); $descr .= PVE::Tools::decode_text($1) . "\n"; next; } if ($line =~ m/^(description):\s*(.*\S)\s*$/) { + $descr = '' if !defined($descr); $descr .= PVE::Tools::decode_text($2); } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) { $conf->{snapstate} = $1; @@ -1817,6 +1952,13 @@ sub parse_vm_config { my $key = $1; my $value = $2; $conf->{$key} = $value; + } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) { + my $value = $1; + if ($section eq 'pending') { + $conf->{delete} = $value; # we parse this later + } else { + warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n"; + } } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) { my $key = $1; my $value = $2; @@ -1845,8 +1987,10 @@ sub parse_vm_config { } } - $conf->{description} = $descr if $descr; - + if (defined($descr)) { + $descr =~ s/\s+$//; + $conf->{description} = $descr; + } delete $res->{snapstate}; # just to be sure return $res; @@ -1872,19 +2016,21 @@ sub write_vm_config { delete $conf->{smp}; } - if ($conf->{maxcpus} && $conf->{sockets}) { - delete $conf->{sockets}; - } - my $used_volids = {}; my $cleanup_config = sub { - my ($cref, $snapname) = @_; + my ($cref, $pending, $snapname) = @_; foreach my $key (keys %$cref) { next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' || - $key eq 'snapstate'; + $key eq 'snapstate' || $key eq 'pending'; my $value = $cref->{$key}; + if ($key eq 'delete') { + die "propertry 'delete' is only allowed in [PENDING]\n" + if !$pending; + # fixme: check syntax? + next; + } eval { $value = check_type($key, $value); }; die "unable to parse value of '$key' - $@" if $@; @@ -1898,8 +2044,12 @@ sub write_vm_config { }; &$cleanup_config($conf); + + &$cleanup_config($conf->{pending}, 1); + foreach my $snapname (keys %{$conf->{snapshots}}) { - &$cleanup_config($conf->{snapshots}->{$snapname}, $snapname); + die "internal error" if $snapname eq 'pending'; + &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname); } # remove 'unusedX' settings if we re-add a volume @@ -1911,24 +2061,35 @@ sub write_vm_config { } my $generate_raw_config = sub { - my ($conf) = @_; + my ($conf, $pending) = @_; my $raw = ''; # add description as comment to top of file - my $descr = $conf->{description} || ''; - foreach my $cl (split(/\n/, $descr)) { - $raw .= '#' . PVE::Tools::encode_text($cl) . "\n"; + if (defined(my $descr = $conf->{description})) { + if ($descr) { + foreach my $cl (split(/\n/, $descr)) { + $raw .= '#' . PVE::Tools::encode_text($cl) . "\n"; + } + } else { + $raw .= "#\n" if $pending; + } } foreach my $key (sort keys %$conf) { - next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots'; + next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' || $key eq 'snapshots'; $raw .= "$key: $conf->{$key}\n"; } return $raw; }; my $raw = &$generate_raw_config($conf); + + if (scalar(keys %{$conf->{pending}})){ + $raw .= "\n[PENDING]\n"; + $raw .= &$generate_raw_config($conf->{pending}, 1); + } + foreach my $snapname (sort keys %{$conf->{snapshots}}) { $raw .= "\n[$snapname]\n"; $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname}); @@ -2200,6 +2361,7 @@ sub vmstatus { $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1); $d->{cpus} = $cpucount if $d->{cpus} > $cpucount; + $d->{cpus} = $conf->{vcpus} if $conf->{vcpus}; $d->{name} = $conf->{name} || "VM $vmid"; $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0; @@ -2233,6 +2395,12 @@ sub vmstatus { $d->{netout} += $netdev->{$dev}->{receive}; $d->{netin} += $netdev->{$dev}->{transmit}; + + if ($full) { + $d->{nics}->{$dev}->{netout} = $netdev->{$dev}->{receive}; + $d->{nics}->{$dev}->{netin} = $netdev->{$dev}->{transmit}; + } + } my $ctime = gettimeofday; @@ -2301,6 +2469,7 @@ sub vmstatus { $d->{freemem} = $info->{free_mem}; } + $d->{ballooninfo} = $info; }; my $blockstatscb = sub { @@ -2308,9 +2477,13 @@ sub vmstatus { my $data = $resp->{'return'} || []; my $totalrdbytes = 0; my $totalwrbytes = 0; + for my $blockstat (@$data) { $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes}; $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes}; + + $blockstat->{device} =~ s/drive-//; + $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats}; } $res->{$vmid}->{diskread} = $totalrdbytes; $res->{$vmid}->{diskwrite} = $totalwrbytes; @@ -2339,7 +2512,7 @@ sub vmstatus { $qmpclient->queue_cmd($vmid, $statuscb, 'query-status'); } - $qmpclient->queue_execute(); + $qmpclient->queue_execute(undef, 1); foreach my $vmid (keys %$list) { next if $opt_vmid && ($vmid ne $opt_vmid); @@ -2349,6 +2522,48 @@ sub vmstatus { return $res; } +sub foreach_dimm { + my ($conf, $vmid, $memory, $sockets, $func) = @_; + + my $dimm_id = 0; + my $current_size = 1024; + my $dimm_size = 512; + return if $current_size == $memory; + + for (my $j = 0; $j < 8; $j++) { + for (my $i = 0; $i < 32; $i++) { + my $name = "dimm${dimm_id}"; + $dimm_id++; + my $numanode = $i % $sockets; + $current_size += $dimm_size; + &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory); + return $current_size if $current_size >= $memory; + } + $dimm_size *= 2; + } +} + +sub foreach_reverse_dimm { + my ($conf, $vmid, $memory, $sockets, $func) = @_; + + my $dimm_id = 253; + my $current_size = 4177920; + my $dimm_size = 65536; + return if $current_size == $memory; + + for (my $j = 0; $j < 8; $j++) { + for (my $i = 0; $i < 32; $i++) { + my $name = "dimm${dimm_id}"; + $dimm_id--; + my $numanode = $i % $sockets; + $current_size -= $dimm_size; + &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory); + return $current_size if $current_size <= $memory; + } + $dimm_size /= 2; + } +} + sub foreach_drive { my ($conf, $func) = @_; @@ -2403,7 +2618,7 @@ sub vga_conf_has_spice { } sub config_to_command { - my ($storecfg, $vmid, $conf, $defaults, $forcemachine) = @_; + my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $use_old_bios_files) = @_; my $cmd = []; my $globalFlags = []; @@ -2426,6 +2641,21 @@ sub config_to_command { my $have_ovz = -f '/proc/vz/vestat'; my $q35 = machine_type_is_q35($conf); + my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); + my $machine_type = $forcemachine || $conf->{machine}; + + my $cpuunits = defined($conf->{cpuunits}) ? + $conf->{cpuunits} : $defaults->{cpuunits}; + + push @$cmd, '/usr/bin/systemd-run'; + push @$cmd, '--scope'; + push @$cmd, '--slice', "qemu"; + push @$cmd, '--unit', $vmid; + push @$cmd, '-p', "CPUShares=$cpuunits"; + if ($conf->{cpulimit}) { + my $cpulimit = int($conf->{cpulimit} * 100); + push @$cmd, '-p', "CPUQuota=$cpulimit\%"; + } push @$cmd, '/usr/bin/kvm'; @@ -2448,8 +2678,6 @@ sub config_to_command { push @$cmd, '-smbios', "type=1,$conf->{smbios1}"; } - push @$cmd, '-object', "iothread,id=iothread0" if $conf->{iothread}; - if ($q35) { # the q35 chipset support native usb2, so we enable usb controller # by default for this machine type @@ -2508,13 +2736,11 @@ sub config_to_command { } my $rombar = $d->{rombar} && $d->{rombar} eq 'off' ? ",rombar=0" : ""; - my $driver = $d->{driver} && $d->{driver} eq 'vfio' ? "vfio-pci" : "pci-assign"; my $xvga = $d->{'x-vga'} && $d->{'x-vga'} eq 'on' ? ",x-vga=on" : ""; if ($xvga && $xvga ne '') { push @$cpuFlags, 'kvm=off'; $vga = 'none'; } - $driver = "vfio-pci" if $xvga ne ''; my $pcidevices = $d->{pciid}; my $multifunction = 1 if @$pcidevices > 1; @@ -2525,7 +2751,7 @@ sub config_to_command { $id .= ".$j" if $multifunction; my $addr = $pciaddr; $addr .= ".$j" if $multifunction; - my $devicestr = "$driver,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr"; + my $devicestr = "vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr"; if($j == 0){ $devicestr .= "$rombar$xvga"; @@ -2586,19 +2812,17 @@ sub config_to_command { $sockets = $conf->{sockets} if $conf->{sockets}; my $cores = $conf->{cores} || 1; - my $maxcpus = $conf->{maxcpus} if $conf->{maxcpus}; - my $total_cores = $sockets * $cores; - my $allowed_cores = $cpuinfo->{cpus}; + my $maxcpus = $sockets * $cores; - die "MAX $allowed_cores cores allowed per VM on this node\n" - if ($allowed_cores < $total_cores); + my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus; - if ($maxcpus) { - push @$cmd, '-smp', "cpus=$cores,maxcpus=$maxcpus"; - } else { - push @$cmd, '-smp', "sockets=$sockets,cores=$cores"; - } + my $allowed_vcpus = $cpuinfo->{cpus}; + + die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" + if ($allowed_vcpus < $maxcpus); + + push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus"; push @$cmd, '-nodefaults'; @@ -2611,7 +2835,7 @@ sub config_to_command { $i++; } - push @$cmd, '-boot', "menu=on"; + push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000"; push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0; @@ -2641,8 +2865,14 @@ sub config_to_command { $ost eq 'wvista') { push @$globalFlags, 'kvm-pit.lost_tick_policy=discard'; push @$cmd, '-no-hpet'; - #push @$cpuFlags , 'hv_vapic" if !$nokvm; #fixme, my win2008R2 hang at boot with this - push @$cpuFlags , 'hv_spinlocks=0xffff' if !$nokvm; + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + push @$cpuFlags , 'hv_spinlocks=0x1fff' if !$nokvm; + push @$cpuFlags , 'hv_vapic' if !$nokvm; + push @$cpuFlags , 'hv_time' if !$nokvm; + + } else { + push @$cpuFlags , 'hv_spinlocks=0xffff' if !$nokvm; + } } if ($ost eq 'win7' || $ost eq 'win8') { @@ -2658,7 +2888,6 @@ sub config_to_command { die "No accelerator found!\n" if !$cpuinfo->{hvm}; } - my $machine_type = $forcemachine || $conf->{machine}; if ($machine_type) { push @$machineFlags, "type=${machine_type}"; } @@ -2674,31 +2903,140 @@ sub config_to_command { push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64'; - push @$cpuFlags , '+x2apic' if !$nokvm && $conf->{ostype} ne 'solaris'; - - push @$cpuFlags , '-x2apic' if $conf->{ostype} eq 'solaris'; + push @$cpuFlags , '-x2apic' + if $conf->{ostype} && $conf->{ostype} eq 'solaris'; push @$cpuFlags, '+sep' if $cpu eq 'kvm64' || $cpu eq 'kvm32'; + push @$cpuFlags, '-rdtscp' if $cpu =~ m/^Opteron/; + + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + + push @$cpuFlags , '+kvm_pv_unhalt' if !$nokvm; + push @$cpuFlags , '+kvm_pv_eoi' if !$nokvm; + push @$cpuFlags , '-kvm_steal_time' if !$nokvm; + } + + push @$cpuFlags, 'enforce' if $cpu ne 'host' && !$nokvm; + $cpu .= "," . join(',', @$cpuFlags) if scalar(@$cpuFlags); - # Note: enforce needs kernel 3.10, so we do not use it for now - # push @$cmd, '-cpu', "$cpu,enforce"; push @$cmd, '-cpu', $cpu; - push @$cmd, '-S' if $conf->{freeze}; + my $memory = $conf->{memory} || $defaults->{memory}; + my $static_memory = 0; + my $dimm_memory = 0; - # set keyboard layout - my $kb = $conf->{keyboard} || $defaults->{keyboard}; - push @$cmd, '-k', $kb if $kb; + if ($hotplug_features->{memory}) { + die "Numa need to be enabled for memory hotplug\n" if !$conf->{numa}; + die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM; + $static_memory = $STATICMEM; + die "minimum memory must be ${static_memory}MB\n" if($memory < $static_memory); + $dimm_memory = $memory - $static_memory; + push @$cmd, '-m', "size=${static_memory},slots=255,maxmem=${MAX_MEM}M"; - # enable sound - #my $soundhw = $conf->{soundhw} || $defaults->{soundhw}; - #push @$cmd, '-soundhw', 'es1370'; - #push @$cmd, '-soundhw', $soundhw if $soundhw; + } else { + + $static_memory = $memory; + push @$cmd, '-m', $static_memory; + } + + if ($conf->{numa}) { + + my $numa_totalmemory = undef; + for (my $i = 0; $i < $MAX_NUMA; $i++) { + next if !$conf->{"numa$i"}; + my $numa = parse_numa($conf->{"numa$i"}); + next if !$numa; + # memory + die "missing numa node$i memory value\n" if !$numa->{memory}; + my $numa_memory = $numa->{memory}; + $numa_totalmemory += $numa_memory; + my $numa_object = "memory-backend-ram,id=ram-node$i,size=${numa_memory}M"; + + # cpus + my $cpus_start = $numa->{cpus}->{start}; + die "missing numa node$i cpus\n" if !defined($cpus_start); + my $cpus_end = $numa->{cpus}->{end} if defined($numa->{cpus}->{end}); + my $cpus = $cpus_start; + if (defined($cpus_end)) { + $cpus .= "-$cpus_end"; + die "numa node$i : cpu range $cpus is incorrect\n" if $cpus_end <= $cpus_start; + } + + # hostnodes + my $hostnodes_start = $numa->{hostnodes}->{start}; + if (defined($hostnodes_start)) { + my $hostnodes_end = $numa->{hostnodes}->{end} if defined($numa->{hostnodes}->{end}); + my $hostnodes = $hostnodes_start; + if (defined($hostnodes_end)) { + $hostnodes .= "-$hostnodes_end"; + die "host node $hostnodes range is incorrect\n" if $hostnodes_end <= $hostnodes_start; + } + + my $hostnodes_end_range = defined($hostnodes_end) ? $hostnodes_end : $hostnodes_start; + for (my $i = $hostnodes_start; $i <= $hostnodes_end_range; $i++ ) { + die "host numa node$i don't exist\n" if ! -d "/sys/devices/system/node/node$i/"; + } + + # policy + my $policy = $numa->{policy}; + die "you need to define a policy for hostnode $hostnodes\n" if !$policy; + $numa_object .= ",host-nodes=$hostnodes,policy=$policy"; + } + + push @$cmd, '-object', $numa_object; + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i"; + } + + die "total memory for NUMA nodes must be equal to vm static memory\n" + if $numa_totalmemory && $numa_totalmemory != $static_memory; + + #if no custom tology, we split memory and cores across numa nodes + if(!$numa_totalmemory) { + + my $numa_memory = ($static_memory / $sockets) . "M"; + + for (my $i = 0; $i < $sockets; $i++) { + + my $cpustart = ($cores * $i); + my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1; + my $cpus = $cpustart; + $cpus .= "-$cpuend" if $cpuend; + + push @$cmd, '-object', "memory-backend-ram,size=$numa_memory,id=ram-node$i"; + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i"; + } + } + } + + if ($hotplug_features->{memory}) { + foreach_dimm($conf, $vmid, $memory, $sockets, sub { + my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; + push @$cmd, "-object" , "memory-backend-ram,id=mem-$name,size=${dimm_size}M"; + push @$cmd, "-device", "pc-dimm,id=$name,memdev=mem-$name,node=$numanode"; + + #if dimm_memory is not aligned to dimm map + if($current_size > $memory) { + $conf->{memory} = $current_size; + update_config_nolock($vmid, $conf, 1); + } + }); + } + + push @$cmd, '-S' if $conf->{freeze}; + + # set keyboard layout + my $kb = $conf->{keyboard} || $defaults->{keyboard}; + push @$cmd, '-k', $kb if $kb; + + # enable sound + #my $soundhw = $conf->{soundhw} || $defaults->{soundhw}; + #push @$cmd, '-soundhw', 'es1370'; + #push @$cmd, '-soundhw', $soundhw if $soundhw; if($conf->{agent}) { - my $qgasocket = qga_socket($vmid); + my $qgasocket = qmp_socket($vmid, 1); my $pciaddr = print_pci_addr("qga0", $bridges); push @$devices, '-chardev', "socket,path=$qgasocket,server,nowait,id=qga0"; push @$devices, '-device', "virtio-serial,id=qga0$pciaddr"; @@ -2723,9 +3061,11 @@ sub config_to_command { my $pciaddr = print_pci_addr("spice", $bridges); - $spice_port = PVE::Tools::next_spice_port(); + my $nodename = PVE::INotify::nodename(); + my $pfamily = PVE::Tools::get_host_address_family($nodename); + $spice_port = PVE::Tools::next_spice_port($pfamily); - push @$devices, '-spice', "tls-port=${spice_port},addr=127.0.0.1,tls-ciphers=DES-CBC3-SHA,seamless-migration=on"; + push @$devices, '-spice', "tls-port=${spice_port},addr=localhost,tls-ciphers=DES-CBC3-SHA,seamless-migration=on"; push @$devices, '-device', "virtio-serial,id=spice$pciaddr"; push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent"; @@ -2777,12 +3117,29 @@ sub config_to_command { } } + if($drive->{interface} eq 'virtio'){ + push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread}; + } + if ($drive->{interface} eq 'scsi') { - my $maxdev = ($scsihw !~ m/^lsi/) ? 256 : 7; - my $controller = int($drive->{index} / $maxdev); - $pciaddr = print_pci_addr("scsihw$controller", $bridges); - push @$devices, '-device', "$scsihw,id=scsihw$controller$pciaddr" if !$scsicontroller->{$controller}; + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); + + $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges); + my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw; + + my $iothread = ''; + if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){ + $iothread .= ",iothread=iothread-$controller_prefix$controller"; + push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller"; + } + + my $queues = ''; + if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){ + $queues = ",num_queues=$drive->{queues}"; + } + + push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues" if !$scsicontroller->{$controller}; $scsicontroller->{$controller}=1; } @@ -2798,8 +3155,6 @@ sub config_to_command { push @$devices, '-device', print_drivedevice_full($storecfg, $conf, $vmid, $drive, $bridges); }); - push @$cmd, '-m', $conf->{memory} || $defaults->{memory}; - for (my $i = 0; $i < $MAX_NETS; $i++) { next if !$conf->{"net$i"}; my $d = parse_net($conf->{"net$i"}); @@ -2815,31 +3170,25 @@ sub config_to_command { my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i"); push @$devices, '-netdev', $netdevfull; - my $netdevicefull = print_netdevice_full($vmid,$conf,$d,"net$i",$bridges); + my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files); push @$devices, '-device', $netdevicefull; } if (!$q35) { # add pci bridges + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + $bridges->{1} = 1; + $bridges->{2} = 1; + } + + $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/; + while (my ($k, $v) = each %$bridges) { $pciaddr = print_pci_addr("pci.$k"); unshift @$devices, '-device', "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" if $k > 0; } } - # hack: virtio with fairsched is unreliable, so we do not use fairsched - # when the VM uses virtio devices. - if (!$use_virtio && $have_ovz) { - - my $cpuunits = defined($conf->{cpuunits}) ? - $conf->{cpuunits} : $defaults->{cpuunits}; - - push @$cmd, '-cpuunits', $cpuunits if $cpuunits; - - # fixme: cpulimit is currently ignored - #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit}; - } - # add custom args if ($conf->{args}) { my $aa = PVE::Tools::split_args($conf->{args}); @@ -2871,13 +3220,9 @@ sub spice_port { } sub qmp_socket { - my ($vmid) = @_; - return "${var_run_tmpdir}/$vmid.qmp"; -} - -sub qga_socket { - my ($vmid) = @_; - return "${var_run_tmpdir}/$vmid.qga"; + my ($vmid, $qga) = @_; + my $sockettype = $qga ? 'qga' : 'qmp'; + return "${var_run_tmpdir}/$vmid.$sockettype"; } sub pidfile_name { @@ -2889,12 +3234,20 @@ sub vm_devices_list { my ($vmid) = @_; my $res = vm_mon_cmd($vmid, 'query-pci'); - my $devices = {}; foreach my $pcibus (@$res) { foreach my $device (@{$pcibus->{devices}}) { next if !$device->{'qdev_id'}; - $devices->{$device->{'qdev_id'}} = 1; + if ($device->{'pci_bridge'}) { + $devices->{$device->{'qdev_id'}} = 1; + foreach my $bridge_device (@{$device->{'pci_bridge'}->{devices}}) { + next if !$bridge_device->{'qdev_id'}; + $devices->{$bridge_device->{'qdev_id'}} = 1; + $devices->{$device->{'qdev_id'}}++; + } + } else { + $devices->{$device->{'qdev_id'}} = 1; + } } } @@ -2905,114 +3258,152 @@ sub vm_devices_list { } } + my $resmice = vm_mon_cmd($vmid, 'query-mice'); + foreach my $mice (@$resmice) { + if ($mice->{name} eq 'QEMU HID Tablet') { + $devices->{tablet} = 1; + last; + } + } + return $devices; } sub vm_deviceplug { my ($storecfg, $conf, $vmid, $deviceid, $device) = @_; - return 1 if !check_running($vmid); - my $q35 = machine_type_is_q35($conf); + my $devices_list = vm_devices_list($vmid); + return 1 if defined($devices_list->{$deviceid}); + + qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid); # add PCI bridge if we need it for the device + if ($deviceid eq 'tablet') { - qemu_deviceadd($vmid, print_tabletdevice_full($conf)); - return 1; - } - return 1 if !$conf->{hotplug}; + qemu_deviceadd($vmid, print_tabletdevice_full($conf)); - my $devices_list = vm_devices_list($vmid); - return 1 if defined($devices_list->{$deviceid}); + } elsif ($deviceid =~ m/^(virtio)(\d+)$/) { - qemu_bridgeadd($storecfg, $conf, $vmid, $deviceid); #add bridge if we need it for the device + qemu_iothread_add($vmid, $deviceid, $device); - if ($deviceid =~ m/^(virtio)(\d+)$/) { - return undef if !qemu_driveadd($storecfg, $vmid, $device); + qemu_driveadd($storecfg, $vmid, $device); my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); + qemu_deviceadd($vmid, $devicefull); - if(!qemu_deviceaddverify($vmid, $deviceid)) { - qemu_drivedel($vmid, $deviceid); - return undef; + eval { qemu_deviceaddverify($vmid, $deviceid); }; + if (my $err = $@) { + eval { qemu_drivedel($vmid, $deviceid); }; + warn $@ if $@; + die $err; } - } - if ($deviceid =~ m/^(scsihw)(\d+)$/) { + } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) { + + my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi"; my $pciaddr = print_pci_addr($deviceid); - my $devicefull = "$scsihw,id=$deviceid$pciaddr"; + my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw; + + my $devicefull = "$scsihw_type,id=$deviceid$pciaddr"; + + if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) { + qemu_iothread_add($vmid, $deviceid, $device); + $devicefull .= ",iothread=iothread-$deviceid"; + } + + if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) { + $devicefull .= ",num_queues=$device->{queues}"; + } + qemu_deviceadd($vmid, $devicefull); - return undef if(!qemu_deviceaddverify($vmid, $deviceid)); - } + qemu_deviceaddverify($vmid, $deviceid); - if ($deviceid =~ m/^(scsi)(\d+)$/) { - return undef if !qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device); - return undef if !qemu_driveadd($storecfg, $vmid, $device); - my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); - if(!qemu_deviceadd($vmid, $devicefull)) { - qemu_drivedel($vmid, $deviceid); - return undef; + } elsif ($deviceid =~ m/^(scsi)(\d+)$/) { + + qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device); + qemu_driveadd($storecfg, $vmid, $device); + + my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); + eval { qemu_deviceadd($vmid, $devicefull); }; + if (my $err = $@) { + eval { qemu_drivedel($vmid, $deviceid); }; + warn $@ if $@; + die $err; } - } - if ($deviceid =~ m/^(net)(\d+)$/) { + } elsif ($deviceid =~ m/^(net)(\d+)$/) { + return undef if !qemu_netdevadd($vmid, $conf, $device, $deviceid); my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid); qemu_deviceadd($vmid, $netdevicefull); - if(!qemu_deviceaddverify($vmid, $deviceid)) { - qemu_netdevdel($vmid, $deviceid); - return undef; + eval { qemu_deviceaddverify($vmid, $deviceid); }; + if (my $err = $@) { + eval { qemu_netdevdel($vmid, $deviceid); }; + warn $@ if $@; + die $err; } - } + } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) { - if (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) { my $bridgeid = $2; my $pciaddr = print_pci_addr($deviceid); my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr"; + qemu_deviceadd($vmid, $devicefull); - return undef if !qemu_deviceaddverify($vmid, $deviceid); + qemu_deviceaddverify($vmid, $deviceid); + + } else { + die "can't hotplug device '$deviceid'\n"; } return 1; } +# fixme: this should raise exceptions on error! sub vm_deviceunplug { my ($vmid, $conf, $deviceid) = @_; - return 1 if !check_running ($vmid); + my $devices_list = vm_devices_list($vmid); + return 1 if !defined($devices_list->{$deviceid}); + + die "can't unplug bootdisk" if $conf->{bootdisk} && $conf->{bootdisk} eq $deviceid; if ($deviceid eq 'tablet') { + qemu_devicedel($vmid, $deviceid); - return 1; - } - return 1 if !$conf->{hotplug}; + } elsif ($deviceid =~ m/^(virtio)(\d+)$/) { - my $devices_list = vm_devices_list($vmid); - return 1 if !defined($devices_list->{$deviceid}); + qemu_devicedel($vmid, $deviceid); + qemu_devicedelverify($vmid, $deviceid); + qemu_drivedel($vmid, $deviceid); + qemu_iothread_del($conf, $vmid, $deviceid); - die "can't unplug bootdisk" if $conf->{bootdisk} && $conf->{bootdisk} eq $deviceid; + } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) { - if ($deviceid =~ m/^(virtio)(\d+)$/) { - qemu_devicedel($vmid, $deviceid); - return undef if !qemu_devicedelverify($vmid, $deviceid); - return undef if !qemu_drivedel($vmid, $deviceid); - } + qemu_devicedel($vmid, $deviceid); + qemu_devicedelverify($vmid, $deviceid); + qemu_iothread_del($conf, $vmid, $deviceid); - if ($deviceid =~ m/^(lsi)(\d+)$/) { - return undef if !qemu_devicedel($vmid, $deviceid); - } + } elsif ($deviceid =~ m/^(scsi)(\d+)$/) { - if ($deviceid =~ m/^(scsi)(\d+)$/) { - return undef if !qemu_devicedel($vmid, $deviceid); - return undef if !qemu_drivedel($vmid, $deviceid); - } + #qemu 2.3 segfault on drive_del with virtioscsi + iothread + my $device = parse_drive($deviceid, $conf->{$deviceid}); + die "virtioscsi with iothread is not hot-unplugglable currently" if $device->{iothread}; - if ($deviceid =~ m/^(net)(\d+)$/) { qemu_devicedel($vmid, $deviceid); - return undef if !qemu_devicedelverify($vmid, $deviceid); - return undef if !qemu_netdevdel($vmid, $deviceid); + qemu_drivedel($vmid, $deviceid); + qemu_deletescsihw($conf, $vmid, $deviceid); + + } elsif ($deviceid =~ m/^(net)(\d+)$/) { + + qemu_devicedel($vmid, $deviceid); + qemu_devicedelverify($vmid, $deviceid); + qemu_netdevdel($vmid, $deviceid); + + } else { + die "can't unplug device '$deviceid'\n"; } return 1; @@ -3025,26 +3416,60 @@ sub qemu_deviceadd { my %options = split(/[=,]/, $devicefull); vm_mon_cmd($vmid, "device_add" , %options); - return 1; } sub qemu_devicedel { - my($vmid, $deviceid) = @_; + my ($vmid, $deviceid) = @_; + my $ret = vm_mon_cmd($vmid, "device_del", id => $deviceid); +} + +sub qemu_iothread_add { + my($vmid, $deviceid, $device) = @_; + + if ($device->{iothread}) { + my $iothreads = vm_iothreads_list($vmid); + qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"}; + } +} + +sub qemu_iothread_del { + my($conf, $vmid, $deviceid) = @_; + + my $device = parse_drive($deviceid, $conf->{$deviceid}); + if ($device->{iothread}) { + my $iothreads = vm_iothreads_list($vmid); + qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"}; + } +} + +sub qemu_objectadd { + my($vmid, $objectid, $qomtype) = @_; + + vm_mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype); + + return 1; +} + +sub qemu_objectdel { + my($vmid, $objectid) = @_; + + vm_mon_cmd($vmid, "object-del", id => $objectid); + return 1; } sub qemu_driveadd { - my($storecfg, $vmid, $device) = @_; + my ($storecfg, $vmid, $device) = @_; my $drive = print_drive_full($storecfg, $vmid, $device); - my $ret = vm_human_monitor_command($vmid, "drive_add auto $drive"); + $drive =~ s/\\/\\\\/g; + my $ret = vm_human_monitor_command($vmid, "drive_add auto \"$drive\""); + # If the command succeeds qemu prints: "OK" - if ($ret !~ m/OK/s) { - syslog("err", "adding drive failed: $ret"); - return undef; - } - return 1; + return 1 if $ret =~ m/OK/s; + + die "adding drive failed: $ret\n"; } sub qemu_drivedel { @@ -3052,76 +3477,118 @@ sub qemu_drivedel { my $ret = vm_human_monitor_command($vmid, "drive_del drive-$deviceid"); $ret =~ s/^\s+//; - if ($ret =~ m/Device \'.*?\' not found/s) { - # NB: device not found errors mean the drive was auto-deleted and we ignore the error - } - elsif ($ret ne "") { - syslog("err", "deleting drive $deviceid failed : $ret"); - return undef; - } - return 1; + + return 1 if $ret eq ""; + + # NB: device not found errors mean the drive was auto-deleted and we ignore the error + return 1 if $ret =~ m/Device \'.*?\' not found/s; + + die "deleting drive $deviceid failed : $ret\n"; } sub qemu_deviceaddverify { - my ($vmid,$deviceid) = @_; + my ($vmid, $deviceid) = @_; for (my $i = 0; $i <= 5; $i++) { my $devices_list = vm_devices_list($vmid); return 1 if defined($devices_list->{$deviceid}); sleep 1; } - syslog("err", "error on hotplug device $deviceid"); - return undef; + + die "error on hotplug device '$deviceid'\n"; } sub qemu_devicedelverify { - my ($vmid,$deviceid) = @_; + my ($vmid, $deviceid) = @_; + + # need to verify that the device is correctly removed as device_del + # is async and empty return is not reliable - #need to verify the device is correctly remove as device_del is async and empty return is not reliable for (my $i = 0; $i <= 5; $i++) { my $devices_list = vm_devices_list($vmid); return 1 if !defined($devices_list->{$deviceid}); sleep 1; } - syslog("err", "error on hot-unplugging device $deviceid"); - return undef; + + die "error on hot-unplugging device '$deviceid'\n"; } sub qemu_findorcreatescsihw { my ($storecfg, $conf, $vmid, $device) = @_; - my $maxdev = ($conf->{scsihw} && ($conf->{scsihw} !~ m/^lsi/)) ? 256 : 7; - my $controller = int($device->{index} / $maxdev); - my $scsihwid="scsihw$controller"; + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device); + + my $scsihwid="$controller_prefix$controller"; my $devices_list = vm_devices_list($vmid); if(!defined($devices_list->{$scsihwid})) { - return undef if !vm_deviceplug($storecfg, $conf, $vmid, $scsihwid); + vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device); } + return 1; } -sub qemu_bridgeadd { +sub qemu_deletescsihw { + my ($conf, $vmid, $opt) = @_; + + my $device = parse_drive($opt, $conf->{$opt}); + + if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) { + vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}"); + return 1; + } + + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device); + + my $devices_list = vm_devices_list($vmid); + foreach my $opt (keys %{$devices_list}) { + if (PVE::QemuServer::valid_drivename($opt)) { + my $drive = PVE::QemuServer::parse_drive($opt, $conf->{$opt}); + if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) { + return 1; + } + } + } + + my $scsihwid="scsihw$controller"; + + vm_deviceunplug($vmid, $conf, $scsihwid); + + return 1; +} + +sub qemu_add_pci_bridge { my ($storecfg, $conf, $vmid, $device) = @_; my $bridges = {}; - my $bridgeid = undef; + + my $bridgeid; + print_pci_addr($device, $bridges); while (my ($k, $v) = each %$bridges) { $bridgeid = $k; } - return if !$bridgeid || $bridgeid < 1; + return 1 if !defined($bridgeid) || $bridgeid < 1; + my $bridge = "pci.$bridgeid"; my $devices_list = vm_devices_list($vmid); - if(!defined($devices_list->{$bridge})) { - return undef if !vm_deviceplug($storecfg, $conf, $vmid, $bridge); + if (!defined($devices_list->{$bridge})) { + vm_deviceplug($storecfg, $conf, $vmid, $bridge); } + return 1; } +sub qemu_set_link_status { + my ($vmid, $device, $up) = @_; + + vm_mon_cmd($vmid, "set_link", name => $device, + up => $up ? JSON::true : JSON::false); +} + sub qemu_netdevadd { my ($vmid, $conf, $device, $deviceid) = @_; @@ -3136,32 +3603,121 @@ sub qemu_netdevdel { my ($vmid, $deviceid) = @_; vm_mon_cmd($vmid, "netdev_del", id => $deviceid); - return 1; } sub qemu_cpu_hotplug { - my ($vmid, $conf, $cores) = @_; + my ($vmid, $conf, $vcpus) = @_; + + my $sockets = 1; + $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused + $sockets = $conf->{sockets} if $conf->{sockets}; + my $cores = $conf->{cores} || 1; + my $maxcpus = $sockets * $cores; + + $vcpus = $maxcpus if !$vcpus; - die "new cores config is not defined" if !$cores; - die "you can't add more cores than maxcpus" - if $conf->{maxcpus} && ($cores > $conf->{maxcpus}); - return if !check_running($vmid); + die "you can't add more vcpus than maxcpus\n" + if $vcpus > $maxcpus; - my $currentcores = $conf->{cores} if $conf->{cores}; - die "current cores is not defined" if !$currentcores; - die "maxcpus is not defined" if !$conf->{maxcpus}; - raise_param_exc({ 'cores' => "online cpu unplug is not yet possible" }) - if($cores < $currentcores); + my $currentvcpus = $conf->{vcpus} || $maxcpus; + die "online cpu unplug is not yet possible\n" + if $vcpus < $currentvcpus; - my $currentrunningcores = vm_mon_cmd($vmid, "query-cpus"); - raise_param_exc({ 'cores' => "cores number if running vm is different than configuration" }) - if scalar (@{$currentrunningcores}) != $currentcores; + my $currentrunningvcpus = vm_mon_cmd($vmid, "query-cpus"); + die "vcpus in running vm is different than configuration\n" + if scalar(@{$currentrunningvcpus}) != $currentvcpus; - for(my $i = $currentcores; $i < $cores; $i++) { + for (my $i = $currentvcpus; $i < $vcpus; $i++) { vm_mon_cmd($vmid, "cpu-add", id => int($i)); } } +sub qemu_memory_hotplug { + my ($vmid, $conf, $defaults, $opt, $value) = @_; + + return $value if !check_running($vmid); + + my $memory = $conf->{memory} || $defaults->{memory}; + $value = $defaults->{memory} if !$value; + return $value if $value == $memory; + + my $static_memory = $STATICMEM; + my $dimm_memory = $memory - $static_memory; + + die "memory can't be lower than $static_memory MB" if $value < $static_memory; + die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM; + + + my $sockets = 1; + $sockets = $conf->{sockets} if $conf->{sockets}; + + if($value > $memory) { + + foreach_dimm($conf, $vmid, $value, $sockets, sub { + my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; + + return if $current_size <= $conf->{memory}; + + eval { vm_mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-ram", id => "mem-$name", props => { size => int($dimm_size*1024*1024) } ) }; + if (my $err = $@) { + eval { qemu_objectdel($vmid, "mem-$name"); }; + die $err; + } + + eval { vm_mon_cmd($vmid, "device_add", driver => "pc-dimm", id => "$name", memdev => "mem-$name", node => $numanode) }; + if (my $err = $@) { + eval { qemu_objectdel($vmid, "mem-$name"); }; + die $err; + } + #update conf after each succesful module hotplug + $conf->{memory} = $current_size; + update_config_nolock($vmid, $conf, 1); + }); + + } else { + + foreach_reverse_dimm($conf, $vmid, $value, $sockets, sub { + my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; + + return if $current_size >= $conf->{memory}; + print "try to unplug memory dimm $name\n"; + + my $retry = 0; + while (1) { + eval { qemu_devicedel($vmid, $name) }; + sleep 3; + my $dimm_list = qemu_dimm_list($vmid); + last if !$dimm_list->{$name}; + raise_param_exc({ $name => "error unplug memory module" }) if $retry > 5; + $retry++; + } + + #update conf after each succesful module unplug + $conf->{memory} = $current_size; + + eval { qemu_objectdel($vmid, "mem-$name"); }; + update_config_nolock($vmid, $conf, 1); + }); + } +} + +sub qemu_dimm_list { + my ($vmid) = @_; + + my $dimmarray = vm_mon_cmd_nocheck($vmid, "query-memory-devices"); + my $dimms = {}; + + foreach my $dimm (@$dimmarray) { + + $dimms->{$dimm->{data}->{id}}->{id} = $dimm->{data}->{id}; + $dimms->{$dimm->{data}->{id}}->{node} = $dimm->{data}->{node}; + $dimms->{$dimm->{data}->{id}}->{addr} = $dimm->{data}->{addr}; + $dimms->{$dimm->{data}->{id}}->{size} = $dimm->{data}->{size}; + $dimms->{$dimm->{data}->{id}}->{slot} = $dimm->{data}->{slot}; + } + return $dimms; +} + sub qemu_block_set_io_throttle { my ($vmid, $deviceid, $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr) = @_; @@ -3249,112 +3805,494 @@ sub vm_monitor_command { die "monitor write error - $!"; } - return if ($cmdstr eq 'q') || ($cmdstr eq 'quit'); + return if ($cmdstr eq 'q') || ($cmdstr eq 'quit'); + + $timeout = 20; + + if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) { + $timeout = 60*60; # 1 hour + } elsif ($cmdstr =~ m/^(eject|change)/) { + $timeout = 60; # note: cdrom mount command is slow + } + if ($res = __read_avail($sock, $timeout)) { + + my @lines = split("\r?\n", $res); + + shift @lines if $lines[0] !~ m/^unknown command/; # skip echo + + $res = join("\n", @lines); + $res .= "\n"; + } + }; + + my $err = $@; + + if ($err) { + syslog("err", "VM $vmid monitor command failed - $err"); + die $err; + } + + return $res; +} + +sub qemu_block_resize { + my ($vmid, $deviceid, $storecfg, $volid, $size) = @_; + + my $running = check_running($vmid); + + return if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running); + + return if !$running; + + vm_mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size)); + +} + +sub qemu_volume_snapshot { + my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_; + + my $running = check_running($vmid); + + if ($running && do_snapshots_with_qemu($storecfg, $volid)){ + vm_mon_cmd($vmid, "snapshot-drive", device => $deviceid, name => $snap); + } else { + PVE::Storage::volume_snapshot($storecfg, $volid, $snap); + } +} + +sub qemu_volume_snapshot_delete { + my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_; + + my $running = check_running($vmid); + + return if !PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running); + + return if !$running; + + vm_mon_cmd($vmid, "delete-drive-snapshot", device => $deviceid, name => $snap); +} + +sub set_migration_caps { + my ($vmid) = @_; + + my $cap_ref = []; + + my $enabled_cap = { + "auto-converge" => 1, + "xbzrle" => 1, + "x-rdma-pin-all" => 0, + "zero-blocks" => 0, + "compress" => 0 + }; + + my $supported_capabilities = vm_mon_cmd_nocheck($vmid, "query-migrate-capabilities"); + + for my $supported_capability (@$supported_capabilities) { + push @$cap_ref, { + capability => $supported_capability->{capability}, + state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false, + }; + } + + vm_mon_cmd_nocheck($vmid, "migrate-set-capabilities", capabilities => $cap_ref); +} + +my $fast_plug_option = { + 'lock' => 1, + 'name' => 1, + 'onboot' => 1, + 'shares' => 1, + 'startup' => 1, + 'description' => 1, +}; + +# hotplug changes in [PENDING] +# $selection hash can be used to only apply specified options, for +# example: { cores => 1 } (only apply changed 'cores') +# $errors ref is used to return error messages +sub vmconfig_hotplug_pending { + my ($vmid, $conf, $storecfg, $selection, $errors) = @_; + + my $defaults = load_defaults(); + + # commit values which do not have any impact on running VM first + # Note: those option cannot raise errors, we we do not care about + # $selection and always apply them. + + my $add_error = sub { + my ($opt, $msg) = @_; + $errors->{$opt} = "hotplug problem - $msg"; + }; + + my $changes = 0; + foreach my $opt (keys %{$conf->{pending}}) { # add/change + if ($fast_plug_option->{$opt}) { + $conf->{$opt} = $conf->{pending}->{$opt}; + delete $conf->{pending}->{$opt}; + $changes = 1; + } + } + + if ($changes) { + update_config_nolock($vmid, $conf, 1); + $conf = load_config($vmid); # update/reload + } + + my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); + + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + while (my ($opt, $force) = each %$pending_delete_hash) { + next if $selection && !$selection->{$opt}; + eval { + if ($opt eq 'hotplug') { + die "skip\n" if ($conf->{hotplug} =~ /memory/); + } elsif ($opt eq 'tablet') { + die "skip\n" if !$hotplug_features->{usb}; + if ($defaults->{tablet}) { + vm_deviceplug($storecfg, $conf, $vmid, $opt); + } else { + vm_deviceunplug($vmid, $conf, $opt); + } + } elsif ($opt eq 'vcpus') { + die "skip\n" if !$hotplug_features->{cpu}; + qemu_cpu_hotplug($vmid, $conf, undef); + } elsif ($opt eq 'balloon') { + # enable balloon device is not hotpluggable + die "skip\n" if !defined($conf->{balloon}) || $conf->{balloon}; + } elsif ($fast_plug_option->{$opt}) { + # do nothing + } elsif ($opt =~ m/^net(\d+)$/) { + die "skip\n" if !$hotplug_features->{network}; + vm_deviceunplug($vmid, $conf, $opt); + } elsif (valid_drivename($opt)) { + die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/; + vm_deviceunplug($vmid, $conf, $opt); + vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); + } elsif ($opt =~ m/^memory$/) { + die "skip\n" if !$hotplug_features->{memory}; + qemu_memory_hotplug($vmid, $conf, $defaults, $opt); + } elsif ($opt eq 'cpuunits') { + cgroups_write("cpu", $vmid, "cpu.shares", $defaults->{cpuunits}); + } elsif ($opt eq 'cpulimit') { + cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", -1); + } else { + die "skip\n"; + } + }; + if (my $err = $@) { + &$add_error($opt, $err) if $err ne "skip\n"; + } else { + # save new config if hotplug was successful + delete $conf->{$opt}; + vmconfig_undelete_pending_option($conf, $opt); + update_config_nolock($vmid, $conf, 1); + $conf = load_config($vmid); # update/reload + } + } + + foreach my $opt (keys %{$conf->{pending}}) { + next if $selection && !$selection->{$opt}; + my $value = $conf->{pending}->{$opt}; + eval { + if ($opt eq 'hotplug') { + die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/); + } elsif ($opt eq 'tablet') { + die "skip\n" if !$hotplug_features->{usb}; + if ($value == 1) { + vm_deviceplug($storecfg, $conf, $vmid, $opt); + } elsif ($value == 0) { + vm_deviceunplug($vmid, $conf, $opt); + } + } elsif ($opt eq 'vcpus') { + die "skip\n" if !$hotplug_features->{cpu}; + qemu_cpu_hotplug($vmid, $conf, $value); + } elsif ($opt eq 'balloon') { + # enable/disable balloning device is not hotpluggable + my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon}); + my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon}); + die "skip\n" if $old_balloon_enabled != $new_balloon_enabled; + + # allow manual ballooning if shares is set to zero + if ((defined($conf->{shares}) && ($conf->{shares} == 0))) { + my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory}; + vm_mon_cmd($vmid, "balloon", value => $balloon*1024*1024); + } + } elsif ($opt =~ m/^net(\d+)$/) { + # some changes can be done without hotplug + vmconfig_update_net($storecfg, $conf, $hotplug_features->{network}, + $vmid, $opt, $value); + } elsif (valid_drivename($opt)) { + # some changes can be done without hotplug + vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk}, + $vmid, $opt, $value, 1); + } elsif ($opt =~ m/^memory$/) { #dimms + die "skip\n" if !$hotplug_features->{memory}; + $value = qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value); + } elsif ($opt eq 'cpuunits') { + cgroups_write("cpu", $vmid, "cpu.shares", $conf->{pending}->{$opt}); + } elsif ($opt eq 'cpulimit') { + my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000); + cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", $cpulimit); + } else { + die "skip\n"; # skip non-hot-pluggable options + } + }; + if (my $err = $@) { + &$add_error($opt, $err) if $err ne "skip\n"; + } else { + # save new config if hotplug was successful + $conf->{$opt} = $value; + delete $conf->{pending}->{$opt}; + update_config_nolock($vmid, $conf, 1); + $conf = load_config($vmid); # update/reload + } + } +} + +sub try_deallocate_drive { + my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_; + + if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) { + my $volid = $drive->{file}; + if (vm_is_volid_owner($storecfg, $vmid, $volid)) { + my $sid = PVE::Storage::parse_volume_id($volid); + $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']); + + # check if the disk is really unused + my $used_paths = PVE::QemuServer::get_used_paths($vmid, $storecfg, $conf, 1, $key); + my $path = PVE::Storage::path($storecfg, $volid); + die "unable to delete '$volid' - volume is still in use (snapshot?)\n" + if $used_paths->{$path}; + PVE::Storage::vdisk_free($storecfg, $volid); + return 1; + } else { + # If vm is not owner of this disk remove from config + return 1; + } + } + + return undef; +} + +sub vmconfig_delete_or_detach_drive { + my ($vmid, $storecfg, $conf, $opt, $force) = @_; + + my $drive = parse_drive($opt, $conf->{$opt}); + + my $rpcenv = PVE::RPCEnvironment::get(); + my $authuser = $rpcenv->get_user(); + + if ($force) { + $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']); + try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force); + } else { + vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive); + } +} + +sub vmconfig_apply_pending { + my ($vmid, $conf, $storecfg) = @_; + + # cold plug + + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + while (my ($opt, $force) = each %$pending_delete_hash) { + die "internal error" if $opt =~ m/^unused/; + $conf = load_config($vmid); # update/reload + if (!defined($conf->{$opt})) { + vmconfig_undelete_pending_option($conf, $opt); + update_config_nolock($vmid, $conf, 1); + } elsif (valid_drivename($opt)) { + vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); + vmconfig_undelete_pending_option($conf, $opt); + delete $conf->{$opt}; + update_config_nolock($vmid, $conf, 1); + } else { + vmconfig_undelete_pending_option($conf, $opt); + delete $conf->{$opt}; + update_config_nolock($vmid, $conf, 1); + } + } + + $conf = load_config($vmid); # update/reload - $timeout = 20; + foreach my $opt (keys %{$conf->{pending}}) { # add/change + $conf = load_config($vmid); # update/reload - if ($cmdstr =~ m/^(info\s+migrate|migrate\s)/) { - $timeout = 60*60; # 1 hour - } elsif ($cmdstr =~ m/^(eject|change)/) { - $timeout = 60; # note: cdrom mount command is slow + if (defined($conf->{$opt}) && ($conf->{$opt} eq $conf->{pending}->{$opt})) { + # skip if nothing changed + } elsif (valid_drivename($opt)) { + vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt})) + if defined($conf->{$opt}); + $conf->{$opt} = $conf->{pending}->{$opt}; + } else { + $conf->{$opt} = $conf->{pending}->{$opt}; } - if ($res = __read_avail($sock, $timeout)) { - my @lines = split("\r?\n", $res); + delete $conf->{pending}->{$opt}; + update_config_nolock($vmid, $conf, 1); + } +} - shift @lines if $lines[0] !~ m/^unknown command/; # skip echo +my $safe_num_ne = sub { + my ($a, $b) = @_; - $res = join("\n", @lines); - $res .= "\n"; - } - }; + return 0 if !defined($a) && !defined($b); + return 1 if !defined($a); + return 1 if !defined($b); - my $err = $@; + return $a != $b; +}; - if ($err) { - syslog("err", "VM $vmid monitor command failed - $err"); - die $err; - } +my $safe_string_ne = sub { + my ($a, $b) = @_; - return $res; -} + return 0 if !defined($a) && !defined($b); + return 1 if !defined($a); + return 1 if !defined($b); -sub qemu_block_resize { - my ($vmid, $deviceid, $storecfg, $volid, $size) = @_; + return $a ne $b; +}; - my $running = check_running($vmid); +sub vmconfig_update_net { + my ($storecfg, $conf, $hotplug, $vmid, $opt, $value) = @_; - return if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running); + my $newnet = parse_net($value); - return if !$running; + if ($conf->{$opt}) { + my $oldnet = parse_net($conf->{$opt}); - vm_mon_cmd($vmid, "block_resize", device => $deviceid, size => int($size)); + if (&$safe_string_ne($oldnet->{model}, $newnet->{model}) || + &$safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) || + &$safe_num_ne($oldnet->{queues}, $newnet->{queues}) || + !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change -} + # for non online change, we try to hot-unplug + die "skip\n" if !$hotplug; + vm_deviceunplug($vmid, $conf, $opt); + } else { -sub qemu_volume_snapshot { - my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_; + die "internal error" if $opt !~ m/net(\d+)/; + my $iface = "tap${vmid}i$1"; - my $running = check_running($vmid); + if (&$safe_num_ne($oldnet->{rate}, $newnet->{rate})) { + PVE::Network::tap_rate_limit($iface, $newnet->{rate}); + } - return if !PVE::Storage::volume_snapshot($storecfg, $volid, $snap, $running); + if (&$safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) || + &$safe_num_ne($oldnet->{tag}, $newnet->{tag}) || + &$safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) { + PVE::Network::tap_unplug($iface); + PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}); + } - return if !$running; + if (&$safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) { + qemu_set_link_status($vmid, $opt, !$newnet->{link_down}); + } - vm_mon_cmd($vmid, "snapshot-drive", device => $deviceid, name => $snap); + return 1; + } + } + if ($hotplug) { + vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet); + } else { + die "skip\n"; + } } -sub qemu_volume_snapshot_delete { - my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_; +sub vmconfig_update_disk { + my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $force) = @_; - my $running = check_running($vmid); + # fixme: do we need force? - return if !PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running); + my $drive = parse_drive($opt, $value); - return if !$running; + if ($conf->{$opt}) { - vm_mon_cmd($vmid, "delete-drive-snapshot", device => $deviceid, name => $snap); -} + if (my $old_drive = parse_drive($opt, $conf->{$opt})) { -sub qga_freezefs { - my ($vmid) = @_; + my $media = $drive->{media} || 'disk'; + my $oldmedia = $old_drive->{media} || 'disk'; + die "unable to change media type\n" if $media ne $oldmedia; - #need to impplement call to qemu-ga -} + if (!drive_is_cdrom($old_drive)) { -sub qga_unfreezefs { - my ($vmid) = @_; + if ($drive->{file} ne $old_drive->{file}) { - #need to impplement call to qemu-ga -} + die "skip\n" if !$hotplug; -sub set_migration_caps { - my ($vmid) = @_; + # unplug and register as unused + vm_deviceunplug($vmid, $conf, $opt); + vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive) - my $cap_ref = []; + } else { + # update existing disk + + # skip non hotpluggable value + if (&$safe_num_ne($drive->{discard}, $old_drive->{discard}) || + &$safe_string_ne($drive->{iothread}, $old_drive->{iothread}) || + &$safe_string_ne($drive->{queues}, $old_drive->{queues}) || + &$safe_string_ne($drive->{cache}, $old_drive->{cache})) { + die "skip\n"; + } - my $enabled_cap = { - "auto-converge" => 1, - "xbzrle" => 0, - "x-rdma-pin-all" => 0, - "zero-blocks" => 0, - }; + # apply throttle + if (&$safe_num_ne($drive->{mbps}, $old_drive->{mbps}) || + &$safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) || + &$safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) || + &$safe_num_ne($drive->{iops}, $old_drive->{iops}) || + &$safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) || + &$safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) || + &$safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) || + &$safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) || + &$safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) || + &$safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) || + &$safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) || + &$safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max})) { + + qemu_block_set_io_throttle($vmid,"drive-$opt", + ($drive->{mbps} || 0)*1024*1024, + ($drive->{mbps_rd} || 0)*1024*1024, + ($drive->{mbps_wr} || 0)*1024*1024, + $drive->{iops} || 0, + $drive->{iops_rd} || 0, + $drive->{iops_wr} || 0, + ($drive->{mbps_max} || 0)*1024*1024, + ($drive->{mbps_rd_max} || 0)*1024*1024, + ($drive->{mbps_wr_max} || 0)*1024*1024, + $drive->{iops_max} || 0, + $drive->{iops_rd_max} || 0, + $drive->{iops_wr_max} || 0); - my $supported_capabilities = vm_mon_cmd_nocheck($vmid, "query-migrate-capabilities"); + } - for my $supported_capability (@$supported_capabilities) { - push @$cap_ref, { - capability => $supported_capability->{capability}, - state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false, - }; + return 1; + } + + } else { # cdrom + + if ($drive->{file} eq 'none') { + vm_mon_cmd($vmid, "eject",force => JSON::true,device => "drive-$opt"); + } else { + my $path = get_iso_path($storecfg, $vmid, $drive->{file}); + vm_mon_cmd($vmid, "eject", force => JSON::true,device => "drive-$opt"); # force eject if locked + vm_mon_cmd($vmid, "change", device => "drive-$opt",target => "$path") if $path; + } + + return 1; + } + } } - vm_mon_cmd_nocheck($vmid, "migrate-set-capabilities", capabilities => $cap_ref); + die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/; + # hotplug new disks + vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive); } sub vm_start { - my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, $forcemachine, $spice_ticket) = @_; + my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, + $forcemachine, $spice_ticket) = @_; lock_config($vmid, sub { my $conf = load_config($vmid, $migratedfrom); @@ -3365,12 +4303,25 @@ sub vm_start { die "VM $vmid already running\n" if check_running($vmid, undef, $migratedfrom); + if (!$statefile && scalar(keys %{$conf->{pending}})) { + vmconfig_apply_pending($vmid, $conf, $storecfg); + $conf = load_config($vmid); # update/reload + } + my $defaults = load_defaults(); # set environment variable useful inside network script $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom; - my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine); + # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we + # load new efi bios files on migration + my $use_old_bios_files; + if ($migratedfrom && $forcemachine && ($forcemachine =~ m/pc-(i440fx|q35)-(\d+)\.(\d+)/)) { + my ($major, $minor) = ($2, $3); + $use_old_bios_files = 1 if ($major <= 2) && ($minor < 4); + } + + my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $use_old_bios_files); my $migrate_port = 0; my $migrate_uri; @@ -3378,12 +4329,13 @@ sub vm_start { if ($statefile eq 'tcp') { my $localip = "localhost"; my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg'); + my $nodename = PVE::INotify::nodename(); if ($datacenterconf->{migration_unsecure}) { - my $nodename = PVE::INotify::nodename(); $localip = PVE::Cluster::remote_node_ip($nodename, 1); } - $migrate_port = PVE::Tools::next_migrate_port(); - $migrate_uri = "tcp:${localip}:${migrate_port}"; + my $pfamily = PVE::Tools::get_host_address_family($nodename); + $migrate_port = PVE::Tools::next_migrate_port($pfamily); + $migrate_uri = "tcp:[${localip}]:${migrate_port}"; push @$cmd, '-incoming', $migrate_uri; push @$cmd, '-S'; } else { @@ -3404,13 +4356,7 @@ sub vm_start { my $info = pci_device_info("0000:$pciid"); die "IOMMU not present\n" if !check_iommu_support(); die "no pci device info for device '$pciid'\n" if !$info; - - if ($d->{driver} && $d->{driver} eq "vfio") { - die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid); - } else { - die "can't unbind/bind to stub pci device '$pciid'\n" if !pci_dev_bind_to_stub($info); - } - + die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid); die "can't reset pci device '$pciid'\n" if $info->{has_fl_reset} and !pci_dev_reset($info); } } @@ -3432,15 +4378,15 @@ sub vm_start { if ($migratedfrom) { eval { - PVE::QemuServer::set_migration_caps($vmid); + set_migration_caps($vmid); }; warn $@ if $@; if ($spice_port) { print "spice listens on port $spice_port\n"; if ($spice_ticket) { - PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "set_password", protocol => 'spice', password => $spice_ticket); - PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "expire_password", protocol => 'spice', time => "+30"); + vm_mon_cmd_nocheck($vmid, "set_password", protocol => 'spice', password => $spice_ticket); + vm_mon_cmd_nocheck($vmid, "expire_password", protocol => 'spice', time => "+30"); } } @@ -3449,12 +4395,20 @@ sub vm_start { if (!$statefile && (!defined($conf->{balloon}) || $conf->{balloon})) { vm_mon_cmd_nocheck($vmid, "balloon", value => $conf->{balloon}*1024*1024) if $conf->{balloon}; - vm_mon_cmd_nocheck($vmid, 'qom-set', - path => "machine/peripheral/balloon0", - property => "guest-stats-polling-interval", - value => 2); + } + + foreach my $opt (keys %$conf) { + next if $opt !~ m/^net\d+$/; + my $nicconf = parse_net($conf->{$opt}); + qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down}; } } + + vm_mon_cmd_nocheck($vmid, 'qom-set', + path => "machine/peripheral/balloon0", + property => "guest-stats-polling-interval", + value => 2) if (!defined($conf->{balloon}) || $conf->{balloon}); + }); } @@ -3486,7 +4440,7 @@ sub vm_qmp_command { eval { die "VM $vmid not running\n" if !check_running($vmid, $nocheck); my $sname = qmp_socket($vmid); - if (-e $sname) { + if (-e $sname) { # test if VM is reasonambe new and supports qmp/qga my $qmpclient = PVE::QMPClient->new(); $res = $qmpclient->cmd($vmid, $cmd, $timeout); @@ -3563,10 +4517,9 @@ sub get_vm_volumes { } sub vm_stop_cleanup { - my ($storecfg, $vmid, $conf, $keepActive) = @_; + my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_; eval { - fairsched_rmnod($vmid); # try to destroy group if (!$keepActive) { my $vollist = get_vm_volumes($conf); @@ -3576,6 +4529,8 @@ sub vm_stop_cleanup { foreach my $ext (qw(mon qmp pid vnc qga)) { unlink "/var/run/qemu-server/${vmid}.$ext"; } + + vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes; }; warn $@ if $@; # avoid errors - just warn } @@ -3592,7 +4547,7 @@ sub vm_stop { my $pid = check_running($vmid, $nocheck, $migratedfrom); kill 15, $pid if $pid; my $conf = load_config($vmid, $migratedfrom); - vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive); + vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0); return; } @@ -3606,7 +4561,7 @@ sub vm_stop { $conf = load_config($vmid); check_lock($conf) if !$skiplock; if (!defined($timeout) && $shutdown && $conf->{startup}) { - my $opts = parse_startup($conf->{startup}); + my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup}); $timeout = $opts->{down} if $opts->{down}; } } @@ -3615,10 +4570,13 @@ sub vm_stop { eval { if ($shutdown) { - $nocheck ? vm_mon_cmd_nocheck($vmid, "system_powerdown") : vm_mon_cmd($vmid, "system_powerdown"); - + if (defined($conf) && $conf->{agent}) { + vm_qmp_command($vmid, { execute => "guest-shutdown" }, $nocheck); + } else { + vm_qmp_command($vmid, { execute => "system_powerdown" }, $nocheck); + } } else { - $nocheck ? vm_mon_cmd_nocheck($vmid, "quit") : vm_mon_cmd($vmid, "quit"); + vm_qmp_command($vmid, { execute => "quit" }, $nocheck); } }; my $err = $@; @@ -3638,7 +4596,7 @@ sub vm_stop { die "VM quit/powerdown failed - got timeout\n"; } } else { - vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive) if $conf; + vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf; return; } } else { @@ -3665,7 +4623,7 @@ sub vm_stop { sleep 1; } - vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive) if $conf; + vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf; }); } @@ -3683,15 +4641,21 @@ sub vm_suspend { } sub vm_resume { - my ($vmid, $skiplock) = @_; + my ($vmid, $skiplock, $nocheck) = @_; lock_config($vmid, sub { - my $conf = load_config($vmid); + if (!$nocheck) { - check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup')); + my $conf = load_config($vmid); - vm_mon_cmd($vmid, "cont"); + check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup')); + + vm_mon_cmd($vmid, "cont"); + + } else { + vm_mon_cmd_nocheck($vmid, "cont"); + } }); } @@ -3717,7 +4681,6 @@ sub vm_destroy { check_lock($conf) if !$skiplock; if (!check_running($vmid)) { - fairsched_rmnod($vmid); # try to destroy group destroy_vm($storecfg, $vmid); } else { die "VM $vmid is running - destroy failed\n"; @@ -3782,30 +4745,6 @@ sub pci_dev_reset { return file_write($fn, "1"); } -sub pci_dev_bind_to_stub { - my ($dev) = @_; - - my $name = $dev->{name}; - - my $testdir = "$pcisysfs/drivers/pci-stub/$name"; - return 1 if -d $testdir; - - my $data = "$dev->{vendor} $dev->{product}"; - return undef if !file_write("$pcisysfs/drivers/pci-stub/new_id", $data); - - my $fn = "$pcisysfs/devices/$name/driver/unbind"; - if (!file_write($fn, $name)) { - return undef if -f $fn; - } - - $fn = "$pcisysfs/drivers/pci-stub/bind"; - if (! -d $testdir) { - return undef if !file_write($fn, $name); - } - - return -d $testdir; -} - sub pci_dev_bind_to_vfio { my ($dev) = @_; @@ -3854,6 +4793,11 @@ sub pci_dev_group_bind_to_vfio { foreach my $pciid (@devs) { $pciid =~ m/^([:\.\da-f]+)$/ or die "PCI ID $pciid not valid!\n"; + + # pci bridges, switches or root ports are not supported + # they have a pci_bus subdirectory so skip them + next if (-e "$pcisysfs/devices/$pciid/pci_bus"); + my $info = pci_device_info($1); pci_dev_bind_to_vfio($info) || die "Cannot bind $pciid to vfio\n"; } @@ -3871,6 +4815,7 @@ sub print_pci_addr { balloon0 => { bus => 0, addr => 3 }, watchdog => { bus => 0, addr => 4 }, scsihw0 => { bus => 0, addr => 5 }, + 'pci.3' => { bus => 0, addr => 5 }, #can also be used for virtio-scsi-single bridge scsihw1 => { bus => 0, addr => 6 }, ahci0 => { bus => 0, addr => 7 }, qga0 => { bus => 0, addr => 8 }, @@ -3933,6 +4878,38 @@ sub print_pci_addr { 'virtio13' => { bus => 2, addr => 8 }, 'virtio14' => { bus => 2, addr => 9 }, 'virtio15' => { bus => 2, addr => 10 }, + 'virtioscsi0' => { bus => 3, addr => 1 }, + 'virtioscsi1' => { bus => 3, addr => 2 }, + 'virtioscsi2' => { bus => 3, addr => 3 }, + 'virtioscsi3' => { bus => 3, addr => 4 }, + 'virtioscsi4' => { bus => 3, addr => 5 }, + 'virtioscsi5' => { bus => 3, addr => 6 }, + 'virtioscsi6' => { bus => 3, addr => 7 }, + 'virtioscsi7' => { bus => 3, addr => 8 }, + 'virtioscsi8' => { bus => 3, addr => 9 }, + 'virtioscsi9' => { bus => 3, addr => 10 }, + 'virtioscsi10' => { bus => 3, addr => 11 }, + 'virtioscsi11' => { bus => 3, addr => 12 }, + 'virtioscsi12' => { bus => 3, addr => 13 }, + 'virtioscsi13' => { bus => 3, addr => 14 }, + 'virtioscsi14' => { bus => 3, addr => 15 }, + 'virtioscsi15' => { bus => 3, addr => 16 }, + 'virtioscsi16' => { bus => 3, addr => 17 }, + 'virtioscsi17' => { bus => 3, addr => 18 }, + 'virtioscsi18' => { bus => 3, addr => 19 }, + 'virtioscsi19' => { bus => 3, addr => 20 }, + 'virtioscsi20' => { bus => 3, addr => 21 }, + 'virtioscsi21' => { bus => 3, addr => 22 }, + 'virtioscsi22' => { bus => 3, addr => 23 }, + 'virtioscsi23' => { bus => 3, addr => 24 }, + 'virtioscsi24' => { bus => 3, addr => 25 }, + 'virtioscsi25' => { bus => 3, addr => 26 }, + 'virtioscsi26' => { bus => 3, addr => 27 }, + 'virtioscsi27' => { bus => 3, addr => 28 }, + 'virtioscsi28' => { bus => 3, addr => 29 }, + 'virtioscsi29' => { bus => 3, addr => 30 }, + 'virtioscsi30' => { bus => 3, addr => 31 }, + }; if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) { @@ -4393,6 +5370,8 @@ sub restore_vma_archive { $d->{volid} = $volid; my $path = PVE::Storage::path($cfg, $volid); + PVE::Storage::activate_volumes($cfg,[$volid]); + my $write_zeros = 1; # fixme: what other storages types initialize volumes with zero? if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs' || $scfg->{type} eq 'glusterfs' || @@ -4458,13 +5437,21 @@ sub restore_vma_archive { alarm($oldtimeout) if $oldtimeout; + my $vollist = []; + foreach my $devname (keys %$devinfo) { + my $volid = $devinfo->{$devname}->{volid}; + push @$vollist, $volid if $volid; + } + + my $cfg = cfs_read_file('storage.cfg'); + PVE::Storage::deactivate_volumes($cfg, $vollist); + unlink $mapfifo; if ($err) { rmtree $tmpdir; unlink $tmpfn; - my $cfg = cfs_read_file('storage.cfg'); foreach my $devname (keys %$devinfo) { my $volid = $devinfo->{$devname}->{volid}; next if !$volid; @@ -4504,7 +5491,7 @@ sub restore_tar_archive { my $storecfg = cfs_read_file('storage.cfg'); # destroy existing data - keep empty config - my $vmcfgfn = PVE::QemuServer::config_file($vmid); + my $vmcfgfn = config_file($vmid); destroy_vm($storecfg, $vmid, 1) if -f $vmcfgfn; my $tocmd = "/usr/lib/qemu-server/qmextract"; @@ -4793,21 +5780,40 @@ my $snapshot_commit = sub { sub snapshot_rollback { my ($vmid, $snapname) = @_; - my $snap; - my $prepare = 1; my $storecfg = PVE::Storage::config(); - my $updatefn = sub { + my $conf = load_config($vmid); - my $conf = load_config($vmid); + my $get_snapshot_config = sub { die "you can't rollback if vm is a template\n" if is_template($conf); - $snap = $conf->{snapshots}->{$snapname}; + my $res = $conf->{snapshots}->{$snapname}; - die "snapshot '$snapname' does not exist\n" if !defined($snap); + die "snapshot '$snapname' does not exist\n" if !defined($res); + + return $res; + }; + + my $snap = &$get_snapshot_config(); + + foreach_drive($snap, sub { + my ($ds, $drive) = @_; + + return if drive_is_cdrom($drive); + + my $volid = $drive->{file}; + + PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname); + }); + + my $updatefn = sub { + + $conf = load_config($vmid); + + $snap = &$get_snapshot_config(); die "unable to rollback to incomplete snapshot (snapstate = $snap->{snapstate})\n" if $snap->{snapstate}; @@ -4887,17 +5893,44 @@ my $savevm_wait = sub { } }; +sub do_snapshots_with_qemu { + my ($storecfg, $volid) = @_; + + my $storage_name = PVE::Storage::parse_volume_id($volid); + + if ($qemu_snap_storage->{$storecfg->{ids}->{$storage_name}->{type}} + && !$storecfg->{ids}->{$storage_name}->{krbd}){ + return 1; + } + + if ($volid =~ m/\.(qcow2|qed)$/){ + return 1; + } + + return undef; +} + sub snapshot_create { - my ($vmid, $snapname, $save_vmstate, $freezefs, $comment) = @_; + my ($vmid, $snapname, $save_vmstate, $comment) = @_; my $snap = &$snapshot_prepare($vmid, $snapname, $save_vmstate, $comment); - $freezefs = $save_vmstate = 0 if !$snap->{vmstate}; # vm is not running + $save_vmstate = 0 if !$snap->{vmstate}; # vm is not running - my $drivehash = {}; + my $config = load_config($vmid); my $running = check_running($vmid); + my $freezefs = $running && $config->{agent}; + $freezefs = 0 if $snap->{vmstate}; # not needed if we save RAM + + my $drivehash = {}; + + if ($freezefs) { + eval { vm_mon_cmd($vmid, "guest-fsfreeze-freeze"); }; + warn "guest-fsfreeze-freeze problems - $@" if $@; + } + eval { # create internal snapshots of all drives @@ -4913,8 +5946,6 @@ sub snapshot_create { } }; - qga_freezefs($vmid) if $running && $freezefs; - foreach_drive($snap, sub { my ($ds, $drive) = @_; @@ -4929,14 +5960,16 @@ sub snapshot_create { }; my $err = $@; - eval { qga_unfreezefs($vmid) if $running && $freezefs; }; - warn $@ if $@; + if ($running) { + eval { vm_mon_cmd($vmid, "savevm-end") }; + warn $@ if $@; - eval { vm_mon_cmd($vmid, "savevm-end") if $running; }; - warn $@ if $@; + if ($freezefs) { + eval { vm_mon_cmd($vmid, "guest-fsfreeze-thaw"); }; + warn "guest-fsfreeze-thaw problems - $@" if $@; + } - # savevm-end is async, we need to wait - if ($running) { + # savevm-end is async, we need to wait for (;;) { my $stat = vm_mon_cmd_nocheck($vmid, "query-savevm"); if (!$stat->{bytes}) { @@ -5156,10 +6189,8 @@ sub qemu_img_convert { sub qemu_img_format { my ($scfg, $volname) = @_; - if ($scfg->{path} && $volname =~ m/\.(raw|qcow2|qed|vmdk)$/) { + if ($scfg->{path} && $volname =~ m/\.(raw|cow|qcow|qcow2|qed|vmdk|cloop)$/) { return $1; - } elsif ($scfg->{type} eq 'iscsi') { - return "host_device"; } else { return "raw"; } @@ -5168,70 +6199,52 @@ sub qemu_img_format { sub qemu_drive_mirror { my ($vmid, $drive, $dst_volid, $vmiddst) = @_; - my $count = 0; - my $old_len = 0; - my $frozen = undef; - my $maxwait = 120; - my $storecfg = PVE::Storage::config(); my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid); my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid); - my $format; - if ($dst_volname =~ m/\.(raw|qcow2)$/){ - $format = $1; - } + my $format = qemu_img_format($dst_scfg, $dst_volname); my $dst_path = PVE::Storage::path($storecfg, $dst_volid); my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $dst_path }; $opts->{format} = $format if $format; - #fixme : sometime drive-mirror timeout, but works fine after. - # (I have see the problem with big volume > 200GB), so we need to eval - eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; - # ignore errors here + print "drive mirror is starting (scanning bitmap) : this step can take some minutes/hours, depend of disk size and storage speed\n"; eval { + vm_mon_cmd($vmid, "drive-mirror", %$opts); while (1) { my $stats = vm_mon_cmd($vmid, "query-block-jobs"); my $stat = @$stats[0]; die "mirroring job seem to have die. Maybe do you have bad sectors?" if !$stat; die "error job is not mirroring" if $stat->{type} ne "mirror"; - my $transferred = $stat->{offset}; - my $total = $stat->{len}; - my $remaining = $total - $transferred; - my $percent = sprintf "%.2f", ($transferred * 100 / $total); my $busy = $stat->{busy}; + my $ready = $stat->{ready}; - print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy\n"; + if (my $total = $stat->{len}) { + my $transferred = $stat->{offset} || 0; + my $remaining = $total - $transferred; + my $percent = sprintf "%.2f", ($transferred * 100 / $total); - if ($stat->{len} == $stat->{offset}) { - if ($busy eq 'false') { + print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n"; + } - last if $vmiddst != $vmid; - # try to switch the disk if source and destination are on the same guest - eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") }; - last if !$@; - die $@ if $@ !~ m/cannot be completed/; - } + if ($stat->{ready} eq 'true') { - if ($count > $maxwait) { - # if too much writes to disk occurs at the end of migration - #the disk needs to be freezed to be able to complete the migration - vm_suspend($vmid,1); - $frozen = 1; - } - $count ++ + last if $vmiddst != $vmid; + + # try to switch the disk if source and destination are on the same guest + eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") }; + last if !$@; + die $@ if $@ !~ m/cannot be completed/; } - $old_len = $stat->{offset}; sleep 1; } - vm_resume($vmid, 1) if $frozen; }; my $err = $@; @@ -5273,7 +6286,8 @@ sub clone_disk { my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); if (!$format) { - $format = $drive->{format} || $defFormat; + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + $format = qemu_img_format($scfg, $volname); } # test if requested format is supported - else use default @@ -5286,6 +6300,8 @@ sub clone_disk { $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $format, undef, ($size/1024)); push @$newvollist, $newvolid; + PVE::Storage::activate_volumes($storecfg, $newvollist); + if (!$running || $snapname) { qemu_img_convert($drive->{file}, $newvolid, $size, $snapname); } else { @@ -5308,7 +6324,7 @@ sub get_current_qemu_machine { my ($vmid) = @_; my $cmd = { execute => 'query-machines', arguments => {} }; - my $res = PVE::QemuServer::vm_qmp_command($vmid, $cmd); + my $res = vm_qmp_command($vmid, $cmd); my ($current, $default); foreach my $e (@$res) { @@ -5320,6 +6336,28 @@ sub get_current_qemu_machine { return $current || $default || 'pc'; } +sub qemu_machine_feature_enabled { + my ($machine, $kvmver, $version_major, $version_minor) = @_; + + my $current_major; + my $current_minor; + + if ($machine && $machine =~ m/^(pc(-i440fx|-q35)?-(\d+)\.(\d+))/) { + + $current_major = $3; + $current_minor = $4; + + } elsif ($kvmver =~ m/^(\d+)\.(\d+)/) { + + $current_major = $1; + $current_minor = $2; + } + + return 1 if $current_major >= $version_major && $current_minor >= $version_minor; + + +} + sub lspci { my $devices = {}; @@ -5333,4 +6371,108 @@ sub lspci { return $devices; } +sub vm_iothreads_list { + my ($vmid) = @_; + + my $res = vm_mon_cmd($vmid, 'query-iothreads'); + + my $iothreads = {}; + foreach my $iothread (@$res) { + $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"}; + } + + return $iothreads; +} + +sub scsihw_infos { + my ($conf, $drive) = @_; + + my $maxdev = 0; + + if ($conf->{scsihw} && ($conf->{scsihw} =~ m/^lsi/)) { + $maxdev = 7; + } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) { + $maxdev = 1; + } else { + $maxdev = 256; + } + + my $controller = int($drive->{index} / $maxdev); + my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single') ? "virtioscsi" : "scsihw"; + + return ($maxdev, $controller, $controller_prefix); +} + +# bash completion helper + +sub complete_backup_archives { + my ($cmdname, $pname, $cvalue) = @_; + + my $cfg = PVE::Storage::config(); + + my $storeid; + + if ($cvalue =~ m/^([^:]+):/) { + $storeid = $1; + } + + my $data = PVE::Storage::template_list($cfg, $storeid, 'backup'); + + my $res = []; + foreach my $id (keys %$data) { + foreach my $item (@{$data->{$id}}) { + next if $item->{format} !~ m/^vma\.(gz|lzo)$/; + push @$res, $item->{volid} if defined($item->{volid}); + } + } + + return $res; +} + +my $complete_vmid_full = sub { + my ($running) = @_; + + my $idlist = vmstatus(); + + my $res = []; + + foreach my $id (keys %$idlist) { + my $d = $idlist->{$id}; + if (defined($running)) { + next if $d->{template}; + next if $running && $d->{status} ne 'running'; + next if !$running && $d->{status} eq 'running'; + } + push @$res, $id; + + } + return $res; +}; + +sub complete_vmid { + return &$complete_vmid_full(); +} + +sub complete_vmid_stopped { + return &$complete_vmid_full(0); +} + +sub complete_vmid_running { + return &$complete_vmid_full(1); +} + +sub complete_storage { + + my $cfg = PVE::Storage::config(); + my $ids = $cfg->{ids}; + + my $res = []; + foreach my $sid (keys %$ids) { + next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1); + push @$res, $sid; + } + + return $res; +} + 1;