X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=PVE%2FQemuServer.pm;h=81f05843efdbbf681bb4be474d445bb5bc92269f;hb=1f720e28c739bcb0fa8ce8c61a066d7d49edbee6;hp=2fe95b1e4bc8174e010c42176a136e5aff8dc321;hpb=ba4eea15e6bd4387433a35dc891d414375994e73;p=qemu-server.git diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index 2fe95b1..81f0584 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -31,6 +31,8 @@ use PVE::QMPClient; use PVE::RPCEnvironment; use Time::HiRes qw(gettimeofday); +my $qemu_snap_storage = {rbd => 1, sheepdog => 1}; + my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); # Note about locking: we use flock on the config file protect @@ -65,76 +67,12 @@ PVE::JSONSchema::register_standard_option('pve-snapshot-name', { #no warnings 'redefine'; -unless(defined(&_VZSYSCALLS_H_)) { - eval 'sub _VZSYSCALLS_H_ () {1;}' unless defined(&_VZSYSCALLS_H_); - require 'sys/syscall.ph'; - if(defined(&__x86_64__)) { - eval 'sub __NR_fairsched_vcpus () {499;}' unless defined(&__NR_fairsched_vcpus); - eval 'sub __NR_fairsched_mknod () {504;}' unless defined(&__NR_fairsched_mknod); - eval 'sub __NR_fairsched_rmnod () {505;}' unless defined(&__NR_fairsched_rmnod); - eval 'sub __NR_fairsched_chwt () {506;}' unless defined(&__NR_fairsched_chwt); - eval 'sub __NR_fairsched_mvpr () {507;}' unless defined(&__NR_fairsched_mvpr); - eval 'sub __NR_fairsched_rate () {508;}' unless defined(&__NR_fairsched_rate); - eval 'sub __NR_setluid () {501;}' unless defined(&__NR_setluid); - eval 'sub __NR_setublimit () {502;}' unless defined(&__NR_setublimit); - } - elsif(defined( &__i386__) ) { - eval 'sub __NR_fairsched_mknod () {500;}' unless defined(&__NR_fairsched_mknod); - eval 'sub __NR_fairsched_rmnod () {501;}' unless defined(&__NR_fairsched_rmnod); - eval 'sub __NR_fairsched_chwt () {502;}' unless defined(&__NR_fairsched_chwt); - eval 'sub __NR_fairsched_mvpr () {503;}' unless defined(&__NR_fairsched_mvpr); - eval 'sub __NR_fairsched_rate () {504;}' unless defined(&__NR_fairsched_rate); - eval 'sub __NR_fairsched_vcpus () {505;}' unless defined(&__NR_fairsched_vcpus); - eval 'sub __NR_setluid () {511;}' unless defined(&__NR_setluid); - eval 'sub __NR_setublimit () {512;}' unless defined(&__NR_setublimit); - } else { - die("no fairsched syscall for this arch"); - } - require 'asm/ioctl.ph'; - eval 'sub KVM_GET_API_VERSION () { &_IO(0xAE, 0x);}' unless defined(&KVM_GET_API_VERSION); -} - -sub fairsched_mknod { - my ($parent, $weight, $desired) = @_; - - return syscall(&__NR_fairsched_mknod, int($parent), int($weight), int($desired)); -} - -sub fairsched_rmnod { - my ($id) = @_; - - return syscall(&__NR_fairsched_rmnod, int($id)); -} - -sub fairsched_mvpr { - my ($pid, $newid) = @_; - - return syscall(&__NR_fairsched_mvpr, int($pid), int($newid)); -} - -sub fairsched_vcpus { - my ($id, $vcpus) = @_; - - return syscall(&__NR_fairsched_vcpus, int($id), int($vcpus)); -} +sub cgroups_write { + my ($controller, $vmid, $option, $value) = @_; -sub fairsched_rate { - my ($id, $op, $rate) = @_; + my $path = "/sys/fs/cgroup/$controller/qemu.slice/$vmid.scope/$option"; + PVE::ProcFSTools::write_proc_entry($path, $value); - return syscall(&__NR_fairsched_rate, int($id), int($op), int($rate)); -} - -use constant FAIRSCHED_SET_RATE => 0; -use constant FAIRSCHED_DROP_RATE => 1; -use constant FAIRSCHED_GET_RATE => 2; - -sub fairsched_cpulimit { - my ($id, $limit) = @_; - - my $cpulim1024 = int($limit * 1024 / 100); - my $op = $cpulim1024 ? FAIRSCHED_SET_RATE : FAIRSCHED_DROP_RATE; - - return fairsched_rate($id, $op, $cpulim1024); } my $nodename = PVE::INotify::nodename(); @@ -152,12 +90,6 @@ mkdir $lock_dir; my $pcisysfs = "/sys/bus/pci"; my $confdesc = { - iothread => { - optional => 1, - type => 'boolean', - description => "Enable iothread dataplane.", - default => 0, - }, onboot => { optional => 1, type => 'boolean', @@ -190,9 +122,10 @@ my $confdesc = { }, cpulimit => { optional => 1, - type => 'integer', - description => "Limit of CPU usage in per cent. Note if the computer has 2 CPUs, it has total of 200% CPU time. Value '0' indicates no CPU limit.\n\nNOTE: This option is currently ignored.", + type => 'number', + description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.", minimum => 0, + maximum => 128, default => 0, }, cpuunits => { @@ -240,7 +173,7 @@ my $confdesc = { optional => 1, type => 'string', description => "scsi controller model", - enum => [qw(lsi lsi53c810 virtio-scsi-pci megasas pvscsi)], + enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)], default => 'lsi', }, description => { @@ -373,12 +306,7 @@ EODESC pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)', default => 'now', }, - startup => { - optional => 1, - type => 'string', format => 'pve-qm-startup', - typetext => '[[order=]\d+] [,up=\d+] [,down=\d+] ', - description => "Startup and shutdown behavior. Order is a non-negative number defining the general startup order. Shutdown in done with reverse ordering. Additionally you can set the 'up' or 'down' delay in seconds, which specifies a delay to wait before the next VM is started or stopped.", - }, + startup => get_standard_option('pve-startup-order'), template => { optional => 1, type => 'boolean', @@ -424,7 +352,7 @@ EODESCR optional => 1, description => "Emulated CPU type.", type => 'string', - enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom Conroe Penryn Nehalem Westmere SandyBridge Haswell Broadwell Opteron_G1 Opteron_G2 Opteron_G3 Opteron_G4 Opteron_G5 host) ], + enum => [ qw(486 athlon pentium pentium2 pentium3 coreduo core2duo kvm32 kvm64 qemu32 qemu64 phenom Conroe Penryn Nehalem Westmere SandyBridge IvyBridge Haswell Broadwell Opteron_G1 Opteron_G2 Opteron_G3 Opteron_G4 Opteron_G5 host) ], default => 'kvm64', }, parent => get_standard_option('pve-snapshot-name', { @@ -445,7 +373,7 @@ EODESCR machine => { description => "Specific the Qemu machine type.", type => 'string', - pattern => '(pc|pc(-i440fx)?-\d+\.\d+|q35|pc-q35-\d+\.\d+)', + pattern => '(pc|pc(-i440fx)?-\d+\.\d+(\.pxe)?|q35|pc-q35-\d+\.\d+(\.pxe)?)', maxLength => 40, optional => 1, }, @@ -456,6 +384,12 @@ EODESCR maxLength => 256, optional => 1, }, + protection => { + optional => 1, + type => 'boolean', + description => "Sets the protection flag of the VM. This will prevent the remove operation.", + default => 0, + }, }; # what about other qemu settings ? @@ -547,7 +481,7 @@ my $drivename_hash; my $idedesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,serial=serial][,model=model]', description => "Use volume as IDE hard disk or CD-ROM (n is 0 to " .($MAX_IDE_DISKS -1) . ").", }; PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc); @@ -555,7 +489,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-ide", $idedesc); my $scsidesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,iothread=on] [,queues=] [,serial=serial]', description => "Use volume as SCSI hard disk or CD-ROM (n is 0 to " . ($MAX_SCSI_DISKS - 1) . ").", }; PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); @@ -563,7 +497,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-scsi", $scsidesc); my $satadesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,serial=serial]', description => "Use volume as SATA hard disk or CD-ROM (n is 0 to " . ($MAX_SATA_DISKS - 1). ").", }; PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc); @@ -571,7 +505,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-sata", $satadesc); my $virtiodesc = { optional => 1, type => 'string', format => 'pve-qm-drive', - typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on]', + typetext => '[volume=]volume,] [,media=cdrom|disk] [,cyls=c,heads=h,secs=s[,trans=t]] [,snapshot=on|off] [,cache=none|writethrough|writeback|unsafe|directsync] [,format=f] [,backup=yes|no] [,rerror=ignore|report|stop] [,werror=enospc|ignore|report|stop] [,aio=native|threads] [,discard=ignore|on] [,iothread=on] [,serial=serial]', description => "Use volume as VIRTIO hard disk (n is 0 to " . ($MAX_VIRTIO_DISKS - 1) . ").", }; PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc); @@ -600,7 +534,7 @@ PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc); my $hostpcidesc = { optional => 1, type => 'string', format => 'pve-qm-hostpci', - typetext => "[host=]HOSTPCIDEVICE [,driver=kvm|vfio] [,rombar=on|off] [,pcie=0|1] [,x-vga=on|off]", + typetext => "[host=]HOSTPCIDEVICE [,rombar=on|off] [,pcie=0|1] [,x-vga=on|off]", description => < </dev/null`; - if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)[,\s]/) { + if ($tmp =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) { $kvm_user_version = $2; } @@ -879,7 +813,7 @@ sub parse_hotplug_features { my $res = {}; return $res if $data eq '0'; - + $data = $confdesc->{hotplug}->{default} if $data eq '1'; foreach my $feature (PVE::Tools::split_list($data)) { @@ -940,7 +874,8 @@ my $format_size = sub { # ideX = [volume=]volume-id[,media=d][,cyls=c,heads=h,secs=s[,trans=t]] # [,snapshot=on|off][,cache=on|off][,format=f][,backup=yes|no] # [,rerror=ignore|report|stop][,werror=enospc|ignore|report|stop] -# [,aio=native|threads][,discard=ignore|on] +# [,aio=native|threads][,discard=ignore|on][,iothread=on] +# [,serial=serial][,model=model] sub parse_drive { my ($key, $data) = @_; @@ -961,7 +896,7 @@ sub parse_drive { foreach my $p (split (/,/, $data)) { next if $p =~ m/^\s*$/; - if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio|bps|mbps|mbps_max|bps_rd|mbps_rd|mbps_rd_max|bps_wr|mbps_wr|mbps_wr_max|iops|iops_max|iops_rd|iops_rd_max|iops_wr|iops_wr_max|size|discard)=(.+)$/) { + if ($p =~ m/^(file|volume|cyls|heads|secs|trans|media|snapshot|cache|format|rerror|werror|backup|aio|bps|mbps|mbps_max|bps_rd|mbps_rd|mbps_rd_max|bps_wr|mbps_wr|mbps_wr_max|iops|iops_max|iops_rd|iops_rd_max|iops_wr|iops_wr_max|size|discard|iothread|queues|serial|model)=(.+)$/) { my ($k, $v) = ($1, $2); $k = 'file' if $k eq 'volume'; @@ -985,10 +920,6 @@ sub parse_drive { return undef if !$res->{file}; - if($res->{file} =~ m/\.(raw|cow|qcow|qcow2|vmdk|cloop)$/){ - $res->{format} = $1; - } - return undef if $res->{cache} && $res->{cache} !~ m/^(off|none|writethrough|writeback|unsafe|directsync)$/; return undef if $res->{snapshot} && $res->{snapshot} !~ m/^(on|off)$/; @@ -997,12 +928,14 @@ sub parse_drive { return undef if $res->{secs} && $res->{secs} !~ m/^\d+$/; return undef if $res->{media} && $res->{media} !~ m/^(disk|cdrom)$/; return undef if $res->{trans} && $res->{trans} !~ m/^(none|lba|auto)$/; - return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qcow2|vmdk|cloop)$/; + return undef if $res->{format} && $res->{format} !~ m/^(raw|cow|qcow|qed|qcow2|vmdk|cloop)$/; return undef if $res->{rerror} && $res->{rerror} !~ m/^(ignore|report|stop)$/; return undef if $res->{werror} && $res->{werror} !~ m/^(enospc|ignore|report|stop)$/; return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/; return undef if $res->{aio} && $res->{aio} !~ m/^(native|threads)$/; return undef if $res->{discard} && $res->{discard} !~ m/^(ignore|on)$/; + return undef if $res->{iothread} && $res->{iothread} !~ m/^(on)$/; + return undef if $res->{queues} && ($res->{queues} !~ m/^\d+$/ || $res->{queues} < 2); return undef if $res->{mbps_rd} && $res->{mbps}; return undef if $res->{mbps_wr} && $res->{mbps}; @@ -1025,7 +958,6 @@ sub parse_drive { return undef if $res->{iops_wr} && $res->{iops_wr} !~ m/^\d+$/; return undef if $res->{iops_wr_max} && $res->{iops_wr_max} !~ m/^\d+$/; - if ($res->{size}) { return undef if !defined($res->{size} = &$parse_size($res->{size})); } @@ -1044,13 +976,13 @@ sub parse_drive { return $res; } -my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio discard iops iops_rd iops_wr iops_max iops_rd_max iops_wr_max); +my @qemu_drive_options = qw(heads secs cyls trans media format cache snapshot rerror werror aio discard iops iops_rd iops_wr iops_max iops_rd_max iops_wr_max serial); sub print_drive { my ($vmid, $drive) = @_; my $opts = ''; - foreach my $o (@qemu_drive_options, 'mbps', 'mbps_rd', 'mbps_wr', 'mbps_max', 'mbps_rd_max', 'mbps_wr_max', 'backup') { + foreach my $o (@qemu_drive_options, 'mbps', 'mbps_rd', 'mbps_wr', 'mbps_max', 'mbps_rd_max', 'mbps_wr_max', 'backup', 'iothread', 'queues') { $opts .= ",$o=$drive->{$o}" if $drive->{$o}; } @@ -1058,6 +990,10 @@ sub print_drive { $opts .= ",size=" . &$format_size($drive->{size}); } + if (my $model = $drive->{model}) { + $opts .= ",model=$model"; + } + return "$drive->{file}$opts"; } @@ -1148,10 +1084,10 @@ sub print_drivedevice_full { if ($drive->{interface} eq 'virtio') { my $pciaddr = print_pci_addr("$drive->{interface}$drive->{index}", $bridges); $device = "virtio-blk-pci,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}$pciaddr"; - $device .= ",iothread=iothread0" if $conf->{iothread}; + $device .= ",iothread=iothread-$drive->{interface}$drive->{index}" if $drive->{iothread}; } elsif ($drive->{interface} eq 'scsi') { - $maxdev = ($conf->{scsihw} && ($conf->{scsihw} !~ m/^lsi/)) ? 256 : 7; - my $controller = int($drive->{index} / $maxdev); + + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); my $unit = $drive->{index} % $maxdev; my $devicetype = 'hd'; my $path = ''; @@ -1178,9 +1114,9 @@ sub print_drivedevice_full { } if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)){ - $device = "scsi-$devicetype,bus=scsihw$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; + $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; } else { - $device = "scsi-$devicetype,bus=scsihw$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; + $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0,lun=$drive->{index},drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; } } elsif ($drive->{interface} eq 'ide'){ @@ -1190,6 +1126,9 @@ sub print_drivedevice_full { my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd"; $device = "ide-$devicetype,bus=ide.$controller,unit=$unit,drive=drive-$drive->{interface}$drive->{index},id=$drive->{interface}$drive->{index}"; + if ($devicetype eq 'hd' && (my $model = $drive->{model})) { + $device .= ",model=$model"; + } } elsif ($drive->{interface} eq 'sata'){ my $controller = int($drive->{index} / $MAX_SATA_DISKS); my $unit = $drive->{index} % $MAX_SATA_DISKS; @@ -1223,34 +1162,54 @@ sub get_initiator_name { sub print_drive_full { my ($storecfg, $vmid, $drive) = @_; + my $path; + my $volid = $drive->{file}; + my $format; + + if (drive_is_cdrom($drive)) { + $path = get_iso_path($storecfg, $vmid, $volid); + } else { + my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1); + if ($storeid) { + $path = PVE::Storage::path($storecfg, $volid); + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + $format = qemu_img_format($scfg, $volname); + } else { + $path = $volid; + } + } + my $opts = ''; foreach my $o (@qemu_drive_options) { next if $o eq 'bootindex'; $opts .= ",$o=$drive->{$o}" if $drive->{$o}; } + $opts .= ",format=$format" if $format && !$drive->{format}; + foreach my $o (qw(bps bps_rd bps_wr)) { my $v = $drive->{"m$o"}; $opts .= ",$o=" . int($v*1024*1024) if $v; } - # use linux-aio by default (qemu default is threads) - $opts .= ",aio=native" if !$drive->{aio}; + my $cache_direct = 0; - my $path; - my $volid = $drive->{file}; - if (drive_is_cdrom($drive)) { - $path = get_iso_path($storecfg, $vmid, $volid); - } else { - if ($volid =~ m|^/|) { - $path = $volid; + if (my $cache = $drive->{cache}) { + $cache_direct = $cache =~ /^(?:off|none|directsync)$/; + } elsif (!drive_is_cdrom($drive)) { + $opts .= ",cache=none"; + $cache_direct = 1; + } + + # aio native works only with O_DIRECT + if (!$drive->{aio}) { + if($cache_direct) { + $opts .= ",aio=native"; } else { - $path = PVE::Storage::path($storecfg, $volid); + $opts .= ",aio=threads"; } } - $opts .= ",cache=none" if !$drive->{cache} && !drive_is_cdrom($drive); - my $detectzeroes = $drive->{discard} ? "unmap" : "on"; $opts .= ",detect-zeroes=$detectzeroes" if !drive_is_cdrom($drive); @@ -1260,7 +1219,7 @@ sub print_drive_full { } sub print_netdevice_full { - my ($vmid, $conf, $net, $netid, $bridges) = @_; + my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files) = @_; my $bootorder = $conf->{boot} || $confdesc->{boot}->{default}; @@ -1277,11 +1236,28 @@ sub print_netdevice_full { $tmpstr .= ",vectors=$vectors,mq=on"; } $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ; + + if ($use_old_bios_files) { + my $romfile; + if ($device eq 'virtio-net-pci') { + $romfile = 'pxe-virtio.rom'; + } elsif ($device eq 'e1000') { + $romfile = 'pxe-e1000.rom'; + } elsif ($device eq 'ne2k') { + $romfile = 'pxe-ne2k_pci.rom'; + } elsif ($device eq 'pcnet') { + $romfile = 'pxe-pcnet.rom'; + } elsif ($device eq 'rtl8139') { + $romfile = 'pxe-rtl8139.rom'; + } + $tmpstr .= ",romfile=$romfile" if $romfile; + } + return $tmpstr; } sub print_netdev_full { - my ($vmid, $conf, $net, $netid) = @_; + my ($vmid, $conf, $net, $netid, $hotplug) = @_; my $i = ''; if ($netid =~ m/^net(\d+)$/) { @@ -1302,9 +1278,10 @@ sub print_netdev_full { my $vmname = $conf->{name} || "vm$vmid"; my $netdev = ""; + my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge"; if ($net->{bridge}) { - $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam"; + $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script,downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam"; } else { $netdev = "type=user,id=$netid,hostname=$vmname"; } @@ -1367,8 +1344,6 @@ sub parse_hostpci { my $pcidevices = lspci($2); $res->{pciid} = $pcidevices->{$2}; } - } elsif ($kv =~ m/^driver=(kvm|vfio)$/) { - $res->{driver} = $1; } elsif ($kv =~ m/^rombar=(on|off)$/) { $res->{rombar} = $1; } elsif ($kv =~ m/^x-vga=(on|off)$/) { @@ -1431,6 +1406,7 @@ sub print_net { $res .= ",tag=$net->{tag}" if $net->{tag}; $res .= ",firewall=1" if $net->{firewall}; $res .= ",link_down=1" if $net->{link_down}; + $res .= ",queues=$net->{queues}" if $net->{queues}; return $res; } @@ -1480,29 +1456,35 @@ sub vm_is_volid_owner { return undef; } +sub split_flagged_list { + my $text = shift || ''; + $text =~ s/[,;]/ /g; + $text =~ s/^\s+//; + return { map { /^(!?)(.*)$/ && ($2, $1) } ($text =~ /\S+/g) }; +} + +sub join_flagged_list { + my ($how, $lst) = @_; + join $how, map { $lst->{$_} . $_ } keys %$lst; +} + sub vmconfig_delete_pending_option { - my ($conf, $key) = @_; + my ($conf, $key, $force) = @_; delete $conf->{pending}->{$key}; - my $pending_delete_hash = { $key => 1 }; - foreach my $opt (PVE::Tools::split_list($conf->{pending}->{delete})) { - $pending_delete_hash->{$opt} = 1; - } - $conf->{pending}->{delete} = join(',', keys %$pending_delete_hash); + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + $pending_delete_hash->{$key} = $force ? '!' : ''; + $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash); } sub vmconfig_undelete_pending_option { my ($conf, $key) = @_; - my $pending_delete_hash = {}; - foreach my $opt (PVE::Tools::split_list($conf->{pending}->{delete})) { - $pending_delete_hash->{$opt} = 1; - } + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); delete $pending_delete_hash->{$key}; - my @keylist = keys %$pending_delete_hash; - if (scalar(@keylist)) { - $conf->{pending}->{delete} = join(',', @keylist); + if (%$pending_delete_hash) { + $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash); } else { delete $conf->{pending}->{delete}; } @@ -1531,19 +1513,18 @@ sub vmconfig_cleanup_pending { } } - # remove delete if option is not set + my $current_delete_hash = split_flagged_list($conf->{pending}->{delete}); my $pending_delete_hash = {}; - foreach my $opt (PVE::Tools::split_list($conf->{pending}->{delete})) { + while (my ($opt, $force) = each %$current_delete_hash) { if (defined($conf->{$opt})) { - $pending_delete_hash->{$opt} = 1; + $pending_delete_hash->{$opt} = $force; } else { $changes = 1; } } - my @keylist = keys %$pending_delete_hash; - if (scalar(@keylist)) { - $conf->{pending}->{delete} = join(',', @keylist); + if (%$pending_delete_hash) { + $conf->{pending}->{delete} = join_flagged_list(',', $pending_delete_hash); } else { delete $conf->{pending}->{delete}; } @@ -1691,41 +1672,6 @@ sub parse_watchdog { return $res; } -PVE::JSONSchema::register_format('pve-qm-startup', \&verify_startup); -sub verify_startup { - my ($value, $noerr) = @_; - - return $value if parse_startup($value); - - return undef if $noerr; - - die "unable to parse startup options\n"; -} - -sub parse_startup { - my ($value) = @_; - - return undef if !$value; - - my $res = {}; - - foreach my $p (split(/,/, $value)) { - next if $p =~ m/^\s*$/; - - if ($p =~ m/^(order=)?(\d+)$/) { - $res->{order} = $2; - } elsif ($p =~ m/^up=(\d+)$/) { - $res->{up} = $1; - } elsif ($p =~ m/^down=(\d+)$/) { - $res->{down} = $1; - } else { - return undef; - } - } - - return $res; -} - sub parse_usb_device { my ($value) = @_; @@ -1964,7 +1910,7 @@ sub parse_vm_config { my $vmid = $1; my $conf = $res; - my $descr = ''; + my $descr; my $section = ''; my @lines = split(/\n/, $raw); @@ -1973,25 +1919,33 @@ sub parse_vm_config { if ($line =~ m/^\[PENDING\]\s*$/i) { $section = 'pending'; - $conf->{description} = $descr if $descr; - $descr = ''; + if (defined($descr)) { + $descr =~ s/\s+$//; + $conf->{description} = $descr; + } + $descr = undef; $conf = $res->{$section} = {}; next; } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) { $section = $1; - $conf->{description} = $descr if $descr; - $descr = ''; + if (defined($descr)) { + $descr =~ s/\s+$//; + $conf->{description} = $descr; + } + $descr = undef; $conf = $res->{snapshots}->{$section} = {}; next; } if ($line =~ m/^\#(.*)\s*$/) { + $descr = '' if !defined($descr); $descr .= PVE::Tools::decode_text($1) . "\n"; next; } if ($line =~ m/^(description):\s*(.*\S)\s*$/) { + $descr = '' if !defined($descr); $descr .= PVE::Tools::decode_text($2); } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) { $conf->{snapstate} = $1; @@ -2034,8 +1988,10 @@ sub parse_vm_config { } } - $conf->{description} = $descr if $descr; - + if (defined($descr)) { + $descr =~ s/\s+$//; + $conf->{description} = $descr; + } delete $res->{snapstate}; # just to be sure return $res; @@ -2106,14 +2062,19 @@ sub write_vm_config { } my $generate_raw_config = sub { - my ($conf) = @_; + my ($conf, $pending) = @_; my $raw = ''; # add description as comment to top of file - my $descr = $conf->{description} || ''; - foreach my $cl (split(/\n/, $descr)) { - $raw .= '#' . PVE::Tools::encode_text($cl) . "\n"; + if (defined(my $descr = $conf->{description})) { + if ($descr) { + foreach my $cl (split(/\n/, $descr)) { + $raw .= '#' . PVE::Tools::encode_text($cl) . "\n"; + } + } else { + $raw .= "#\n" if $pending; + } } foreach my $key (sort keys %$conf) { @@ -2127,7 +2088,7 @@ sub write_vm_config { if (scalar(keys %{$conf->{pending}})){ $raw .= "\n[PENDING]\n"; - $raw .= &$generate_raw_config($conf->{pending}); + $raw .= &$generate_raw_config($conf->{pending}, 1); } foreach my $snapname (sort keys %{$conf->{snapshots}}) { @@ -2197,6 +2158,8 @@ sub check_local_resources { foreach my $k (keys %$conf) { next if $k =~ m/^usb/ && ($conf->{$k} eq 'spice'); + # sockets are safe: they will recreated be on the target side post-migrate + next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket'); $loc_res = 1 if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/; } @@ -2401,6 +2364,7 @@ sub vmstatus { $d->{cpus} = ($conf->{sockets} || 1) * ($conf->{cores} || 1); $d->{cpus} = $cpucount if $d->{cpus} > $cpucount; + $d->{cpus} = $conf->{vcpus} if $conf->{vcpus}; $d->{name} = $conf->{name} || "VM $vmid"; $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024) : 0; @@ -2434,6 +2398,12 @@ sub vmstatus { $d->{netout} += $netdev->{$dev}->{receive}; $d->{netin} += $netdev->{$dev}->{transmit}; + + if ($full) { + $d->{nics}->{$dev}->{netout} = $netdev->{$dev}->{receive}; + $d->{nics}->{$dev}->{netin} = $netdev->{$dev}->{transmit}; + } + } my $ctime = gettimeofday; @@ -2502,6 +2472,7 @@ sub vmstatus { $d->{freemem} = $info->{free_mem}; } + $d->{ballooninfo} = $info; }; my $blockstatscb = sub { @@ -2509,9 +2480,13 @@ sub vmstatus { my $data = $resp->{'return'} || []; my $totalrdbytes = 0; my $totalwrbytes = 0; + for my $blockstat (@$data) { $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes}; $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes}; + + $blockstat->{device} =~ s/drive-//; + $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats}; } $res->{$vmid}->{diskread} = $totalrdbytes; $res->{$vmid}->{diskwrite} = $totalwrbytes; @@ -2571,6 +2546,27 @@ sub foreach_dimm { } } +sub foreach_reverse_dimm { + my ($conf, $vmid, $memory, $sockets, $func) = @_; + + my $dimm_id = 253; + my $current_size = 4177920; + my $dimm_size = 65536; + return if $current_size == $memory; + + for (my $j = 0; $j < 8; $j++) { + for (my $i = 0; $i < 32; $i++) { + my $name = "dimm${dimm_id}"; + $dimm_id--; + my $numanode = $i % $sockets; + $current_size -= $dimm_size; + &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory); + return $current_size if $current_size <= $memory; + } + $dimm_size /= 2; + } +} + sub foreach_drive { my ($conf, $func) = @_; @@ -2649,6 +2645,25 @@ sub config_to_command { my $q35 = machine_type_is_q35($conf); my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); + my $machine_type = $forcemachine || $conf->{machine}; + my $use_old_bios_files = undef; + ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type); + + my $cpuunits = defined($conf->{cpuunits}) ? + $conf->{cpuunits} : $defaults->{cpuunits}; + + push @$cmd, '/usr/bin/systemd-run'; + push @$cmd, '--scope'; + push @$cmd, '--slice', "qemu"; + push @$cmd, '--unit', $vmid; + # set KillMode=none, so that systemd don't kill those scopes + # at shutdown (pve-manager service should stop the VMs instead) + push @$cmd, '-p', "KillMode=none"; + push @$cmd, '-p', "CPUShares=$cpuunits"; + if ($conf->{cpulimit}) { + my $cpulimit = int($conf->{cpulimit} * 100); + push @$cmd, '-p', "CPUQuota=$cpulimit\%"; + } push @$cmd, '/usr/bin/kvm'; @@ -2671,8 +2686,6 @@ sub config_to_command { push @$cmd, '-smbios', "type=1,$conf->{smbios1}"; } - push @$cmd, '-object', "iothread,id=iothread0" if $conf->{iothread}; - if ($q35) { # the q35 chipset support native usb2, so we enable usb controller # by default for this machine type @@ -2731,13 +2744,11 @@ sub config_to_command { } my $rombar = $d->{rombar} && $d->{rombar} eq 'off' ? ",rombar=0" : ""; - my $driver = $d->{driver} && $d->{driver} eq 'vfio' ? "vfio-pci" : "pci-assign"; my $xvga = $d->{'x-vga'} && $d->{'x-vga'} eq 'on' ? ",x-vga=on" : ""; if ($xvga && $xvga ne '') { push @$cpuFlags, 'kvm=off'; $vga = 'none'; } - $driver = "vfio-pci" if $xvga ne ''; my $pcidevices = $d->{pciid}; my $multifunction = 1 if @$pcidevices > 1; @@ -2748,7 +2759,7 @@ sub config_to_command { $id .= ".$j" if $multifunction; my $addr = $pciaddr; $addr .= ".$j" if $multifunction; - my $devicestr = "$driver,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr"; + my $devicestr = "vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr"; if($j == 0){ $devicestr .= "$rombar$xvga"; @@ -2816,7 +2827,7 @@ sub config_to_command { my $allowed_vcpus = $cpuinfo->{cpus}; - die "MAX $maxcpus vcpus allowed per VM on this node\n" + die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus); push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus"; @@ -2862,8 +2873,14 @@ sub config_to_command { $ost eq 'wvista') { push @$globalFlags, 'kvm-pit.lost_tick_policy=discard'; push @$cmd, '-no-hpet'; - #push @$cpuFlags , 'hv_vapic" if !$nokvm; #fixme, my win2008R2 hang at boot with this - push @$cpuFlags , 'hv_spinlocks=0xffff' if !$nokvm; + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + push @$cpuFlags , 'hv_spinlocks=0x1fff' if !$nokvm; + push @$cpuFlags , 'hv_vapic' if !$nokvm; + push @$cpuFlags , 'hv_time' if !$nokvm; + + } else { + push @$cpuFlags , 'hv_spinlocks=0xffff' if !$nokvm; + } } if ($ost eq 'win7' || $ost eq 'win8') { @@ -2879,7 +2896,6 @@ sub config_to_command { die "No accelerator found!\n" if !$cpuinfo->{hvm}; } - my $machine_type = $forcemachine || $conf->{machine}; if ($machine_type) { push @$machineFlags, "type=${machine_type}"; } @@ -2895,16 +2911,23 @@ sub config_to_command { push @$cpuFlags , '+lahf_lm' if $cpu eq 'kvm64'; - push @$cpuFlags , '+x2apic' if !$nokvm && $conf->{ostype} ne 'solaris'; - - push @$cpuFlags , '-x2apic' if $conf->{ostype} eq 'solaris'; + push @$cpuFlags , '-x2apic' + if $conf->{ostype} && $conf->{ostype} eq 'solaris'; push @$cpuFlags, '+sep' if $cpu eq 'kvm64' || $cpu eq 'kvm32'; + push @$cpuFlags, '-rdtscp' if $cpu =~ m/^Opteron/; + + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + + push @$cpuFlags , '+kvm_pv_unhalt' if !$nokvm; + push @$cpuFlags , '+kvm_pv_eoi' if !$nokvm; + } + + push @$cpuFlags, 'enforce' if $cpu ne 'host' && !$nokvm; + $cpu .= "," . join(',', @$cpuFlags) if scalar(@$cpuFlags); - # Note: enforce needs kernel 3.10, so we do not use it for now - # push @$cmd, '-cpu', "$cpu,enforce"; push @$cmd, '-cpu', $cpu; my $memory = $conf->{memory} || $defaults->{memory}; @@ -3045,9 +3068,11 @@ sub config_to_command { my $pciaddr = print_pci_addr("spice", $bridges); - $spice_port = PVE::Tools::next_spice_port(); + my $nodename = PVE::INotify::nodename(); + my $pfamily = PVE::Tools::get_host_address_family($nodename); + $spice_port = PVE::Tools::next_spice_port($pfamily); - push @$devices, '-spice', "tls-port=${spice_port},addr=127.0.0.1,tls-ciphers=DES-CBC3-SHA,seamless-migration=on"; + push @$devices, '-spice', "tls-port=${spice_port},addr=localhost,tls-ciphers=DES-CBC3-SHA,seamless-migration=on"; push @$devices, '-device', "virtio-serial,id=spice$pciaddr"; push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent"; @@ -3099,12 +3124,29 @@ sub config_to_command { } } + if($drive->{interface} eq 'virtio'){ + push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread}; + } + if ($drive->{interface} eq 'scsi') { - my $maxdev = ($scsihw !~ m/^lsi/) ? 256 : 7; - my $controller = int($drive->{index} / $maxdev); - $pciaddr = print_pci_addr("scsihw$controller", $bridges); - push @$devices, '-device', "$scsihw,id=scsihw$controller$pciaddr" if !$scsicontroller->{$controller}; + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive); + + $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges); + my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw; + + my $iothread = ''; + if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){ + $iothread .= ",iothread=iothread-$controller_prefix$controller"; + push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller"; + } + + my $queues = ''; + if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){ + $queues = ",num_queues=$drive->{queues}"; + } + + push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues" if !$scsicontroller->{$controller}; $scsicontroller->{$controller}=1; } @@ -3135,31 +3177,25 @@ sub config_to_command { my $netdevfull = print_netdev_full($vmid,$conf,$d,"net$i"); push @$devices, '-netdev', $netdevfull; - my $netdevicefull = print_netdevice_full($vmid,$conf,$d,"net$i",$bridges); + my $netdevicefull = print_netdevice_full($vmid, $conf, $d, "net$i", $bridges, $use_old_bios_files); push @$devices, '-device', $netdevicefull; } if (!$q35) { # add pci bridges + if (qemu_machine_feature_enabled ($machine_type, $kvmver, 2, 3)) { + $bridges->{1} = 1; + $bridges->{2} = 1; + } + + $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/; + while (my ($k, $v) = each %$bridges) { $pciaddr = print_pci_addr("pci.$k"); unshift @$devices, '-device', "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" if $k > 0; } } - # hack: virtio with fairsched is unreliable, so we do not use fairsched - # when the VM uses virtio devices. - if (!$use_virtio && $have_ovz) { - - my $cpuunits = defined($conf->{cpuunits}) ? - $conf->{cpuunits} : $defaults->{cpuunits}; - - push @$cmd, '-cpuunits', $cpuunits if $cpuunits; - - # fixme: cpulimit is currently ignored - #push @$cmd, '-cpulimit', $conf->{cpulimit} if $conf->{cpulimit}; - } - # add custom args if ($conf->{args}) { my $aa = PVE::Tools::split_args($conf->{args}); @@ -3256,6 +3292,8 @@ sub vm_deviceplug { } elsif ($deviceid =~ m/^(virtio)(\d+)$/) { + qemu_iothread_add($vmid, $deviceid, $device); + qemu_driveadd($storecfg, $vmid, $device); my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); @@ -3267,11 +3305,23 @@ sub vm_deviceplug { die $err; } - } elsif ($deviceid =~ m/^(scsihw)(\d+)$/) { + } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) { + my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi"; my $pciaddr = print_pci_addr($deviceid); - my $devicefull = "$scsihw,id=$deviceid$pciaddr"; + my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw; + + my $devicefull = "$scsihw_type,id=$deviceid$pciaddr"; + + if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) { + qemu_iothread_add($vmid, $deviceid, $device); + $devicefull .= ",iothread=iothread-$deviceid"; + } + + if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) { + $devicefull .= ",num_queues=$device->{queues}"; + } qemu_deviceadd($vmid, $devicefull); qemu_deviceaddverify($vmid, $deviceid); @@ -3280,7 +3330,7 @@ sub vm_deviceplug { qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device); qemu_driveadd($storecfg, $vmid, $device); - + my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device); eval { qemu_deviceadd($vmid, $devicefull); }; if (my $err = $@) { @@ -3292,7 +3342,12 @@ sub vm_deviceplug { } elsif ($deviceid =~ m/^(net)(\d+)$/) { return undef if !qemu_netdevadd($vmid, $conf, $device, $deviceid); - my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid); + + my $machine_type = PVE::QemuServer::qemu_machine_pxe($vmid, $conf); + my $use_old_bios_files = undef; + ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type); + + my $netdevicefull = print_netdevice_full($vmid, $conf, $device, $deviceid, undef, $use_old_bios_files); qemu_deviceadd($vmid, $netdevicefull); eval { qemu_deviceaddverify($vmid, $deviceid); }; if (my $err = $@) { @@ -3306,12 +3361,12 @@ sub vm_deviceplug { my $bridgeid = $2; my $pciaddr = print_pci_addr($deviceid); my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr"; - + qemu_deviceadd($vmid, $devicefull); qemu_deviceaddverify($vmid, $deviceid); } else { - die "can't hotplug device '$deviceid'\n"; + die "can't hotplug device '$deviceid'\n"; } return 1; @@ -3335,16 +3390,24 @@ sub vm_deviceunplug { qemu_devicedel($vmid, $deviceid); qemu_devicedelverify($vmid, $deviceid); qemu_drivedel($vmid, $deviceid); - - } elsif ($deviceid =~ m/^(lsi)(\d+)$/) { - + qemu_iothread_del($conf, $vmid, $deviceid); + + } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) { + qemu_devicedel($vmid, $deviceid); - + qemu_devicedelverify($vmid, $deviceid); + qemu_iothread_del($conf, $vmid, $deviceid); + } elsif ($deviceid =~ m/^(scsi)(\d+)$/) { + #qemu 2.3 segfault on drive_del with virtioscsi + iothread + my $device = parse_drive($deviceid, $conf->{$deviceid}); + die "virtioscsi with iothread is not hot-unplugglable currently" if $device->{iothread}; + qemu_devicedel($vmid, $deviceid); qemu_drivedel($vmid, $deviceid); - + qemu_deletescsihw($conf, $vmid, $deviceid); + } elsif ($deviceid =~ m/^(net)(\d+)$/) { qemu_devicedel($vmid, $deviceid); @@ -3373,6 +3436,25 @@ sub qemu_devicedel { my $ret = vm_mon_cmd($vmid, "device_del", id => $deviceid); } +sub qemu_iothread_add { + my($vmid, $deviceid, $device) = @_; + + if ($device->{iothread}) { + my $iothreads = vm_iothreads_list($vmid); + qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"}; + } +} + +sub qemu_iothread_del { + my($conf, $vmid, $deviceid) = @_; + + my $device = parse_drive($deviceid, $conf->{$deviceid}); + if ($device->{iothread}) { + my $iothreads = vm_iothreads_list($vmid); + qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"}; + } +} + sub qemu_objectadd { my($vmid, $objectid, $qomtype) = @_; @@ -3393,6 +3475,7 @@ sub qemu_driveadd { my ($storecfg, $vmid, $device) = @_; my $drive = print_drive_full($storecfg, $vmid, $device); + $drive =~ s/\\/\\\\/g; my $ret = vm_human_monitor_command($vmid, "drive_add auto \"$drive\""); # If the command succeeds qemu prints: "OK" @@ -3406,12 +3489,12 @@ sub qemu_drivedel { my $ret = vm_human_monitor_command($vmid, "drive_del drive-$deviceid"); $ret =~ s/^\s+//; - + return 1 if $ret eq ""; - + # NB: device not found errors mean the drive was auto-deleted and we ignore the error - return 1 if $ret =~ m/Device \'.*?\' not found/s; - + return 1 if $ret =~ m/Device \'.*?\' not found/s; + die "deleting drive $deviceid failed : $ret\n"; } @@ -3431,7 +3514,7 @@ sub qemu_deviceaddverify { sub qemu_devicedelverify { my ($vmid, $deviceid) = @_; - # need to verify that the device is correctly removed as device_del + # need to verify that the device is correctly removed as device_del # is async and empty return is not reliable for (my $i = 0; $i <= 5; $i++) { @@ -3446,18 +3529,47 @@ sub qemu_devicedelverify { sub qemu_findorcreatescsihw { my ($storecfg, $conf, $vmid, $device) = @_; - my $maxdev = ($conf->{scsihw} && ($conf->{scsihw} !~ m/^lsi/)) ? 256 : 7; - my $controller = int($device->{index} / $maxdev); - my $scsihwid="scsihw$controller"; + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device); + + my $scsihwid="$controller_prefix$controller"; my $devices_list = vm_devices_list($vmid); if(!defined($devices_list->{$scsihwid})) { - vm_deviceplug($storecfg, $conf, $vmid, $scsihwid); + vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device); } return 1; } +sub qemu_deletescsihw { + my ($conf, $vmid, $opt) = @_; + + my $device = parse_drive($opt, $conf->{$opt}); + + if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) { + vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}"); + return 1; + } + + my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device); + + my $devices_list = vm_devices_list($vmid); + foreach my $opt (keys %{$devices_list}) { + if (PVE::QemuServer::valid_drivename($opt)) { + my $drive = PVE::QemuServer::parse_drive($opt, $conf->{$opt}); + if($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) { + return 1; + } + } + } + + my $scsihwid="scsihw$controller"; + + vm_deviceunplug($vmid, $conf, $scsihwid); + + return 1; +} + sub qemu_add_pci_bridge { my ($storecfg, $conf, $vmid, $device) = @_; @@ -3485,14 +3597,14 @@ sub qemu_add_pci_bridge { sub qemu_set_link_status { my ($vmid, $device, $up) = @_; - vm_mon_cmd($vmid, "set_link", name => $device, + vm_mon_cmd($vmid, "set_link", name => $device, up => $up ? JSON::true : JSON::false); } sub qemu_netdevadd { my ($vmid, $conf, $device, $deviceid) = @_; - my $netdev = print_netdev_full($vmid, $conf, $device, $deviceid); + my $netdev = print_netdev_full($vmid, $conf, $device, $deviceid, 1); my %options = split(/[=,]/, $netdev); vm_mon_cmd($vmid, "netdev_add", %options); @@ -3536,42 +3648,86 @@ sub qemu_memory_hotplug { my ($vmid, $conf, $defaults, $opt, $value) = @_; return $value if !check_running($vmid); - + my $memory = $conf->{memory} || $defaults->{memory}; - $value = $defaults->{memory} if !$value; + $value = $defaults->{memory} if !$value; return $value if $value == $memory; my $static_memory = $STATICMEM; my $dimm_memory = $memory - $static_memory; die "memory can't be lower than $static_memory MB" if $value < $static_memory; - die "memory unplug is not yet available" if $value < $memory; die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM; my $sockets = 1; $sockets = $conf->{sockets} if $conf->{sockets}; - foreach_dimm($conf, $vmid, $value, $sockets, sub { - my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; + if($value > $memory) { - return if $current_size <= $conf->{memory}; + foreach_dimm($conf, $vmid, $value, $sockets, sub { + my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; - eval { vm_mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-ram", id => "mem-$name", props => { size => int($dimm_size*1024*1024) } ) }; - if (my $err = $@) { - eval { qemu_objectdel($vmid, "mem-$name"); }; - die $err; - } + return if $current_size <= $conf->{memory}; - eval { vm_mon_cmd($vmid, "device_add", driver => "pc-dimm", id => "$name", memdev => "mem-$name", node => $numanode) }; - if (my $err = $@) { - eval { qemu_objectdel($vmid, "mem-$name"); }; - die $err; - } - #update conf after each succesful module hotplug - $conf->{memory} = $current_size; - update_config_nolock($vmid, $conf, 1); - }); + eval { vm_mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-ram", id => "mem-$name", props => { size => int($dimm_size*1024*1024) } ) }; + if (my $err = $@) { + eval { qemu_objectdel($vmid, "mem-$name"); }; + die $err; + } + + eval { vm_mon_cmd($vmid, "device_add", driver => "pc-dimm", id => "$name", memdev => "mem-$name", node => $numanode) }; + if (my $err = $@) { + eval { qemu_objectdel($vmid, "mem-$name"); }; + die $err; + } + #update conf after each succesful module hotplug + $conf->{memory} = $current_size; + update_config_nolock($vmid, $conf, 1); + }); + + } else { + + foreach_reverse_dimm($conf, $vmid, $value, $sockets, sub { + my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_; + + return if $current_size >= $conf->{memory}; + print "try to unplug memory dimm $name\n"; + + my $retry = 0; + while (1) { + eval { qemu_devicedel($vmid, $name) }; + sleep 3; + my $dimm_list = qemu_dimm_list($vmid); + last if !$dimm_list->{$name}; + raise_param_exc({ $name => "error unplug memory module" }) if $retry > 5; + $retry++; + } + + #update conf after each succesful module unplug + $conf->{memory} = $current_size; + + eval { qemu_objectdel($vmid, "mem-$name"); }; + update_config_nolock($vmid, $conf, 1); + }); + } +} + +sub qemu_dimm_list { + my ($vmid) = @_; + + my $dimmarray = vm_mon_cmd_nocheck($vmid, "query-memory-devices"); + my $dimms = {}; + + foreach my $dimm (@$dimmarray) { + + $dimms->{$dimm->{data}->{id}}->{id} = $dimm->{data}->{id}; + $dimms->{$dimm->{data}->{id}}->{node} = $dimm->{data}->{node}; + $dimms->{$dimm->{data}->{id}}->{addr} = $dimm->{data}->{addr}; + $dimms->{$dimm->{data}->{id}}->{size} = $dimm->{data}->{size}; + $dimms->{$dimm->{data}->{id}}->{slot} = $dimm->{data}->{slot}; + } + return $dimms; } sub qemu_block_set_io_throttle { @@ -3709,12 +3865,11 @@ sub qemu_volume_snapshot { my $running = check_running($vmid); - return if !PVE::Storage::volume_snapshot($storecfg, $volid, $snap, $running); - - return if !$running; - - vm_mon_cmd($vmid, "snapshot-drive", device => $deviceid, name => $snap); - + if ($running && do_snapshots_with_qemu($storecfg, $volid)){ + vm_mon_cmd($vmid, "snapshot-drive", device => $deviceid, name => $snap); + } else { + PVE::Storage::volume_snapshot($storecfg, $volid, $snap); + } } sub qemu_volume_snapshot_delete { @@ -3736,9 +3891,10 @@ sub set_migration_caps { my $enabled_cap = { "auto-converge" => 1, - "xbzrle" => 0, + "xbzrle" => 1, "x-rdma-pin-all" => 0, "zero-blocks" => 0, + "compress" => 0 }; my $supported_capabilities = vm_mon_cmd_nocheck($vmid, "query-migrate-capabilities"); @@ -3754,10 +3910,12 @@ sub set_migration_caps { } my $fast_plug_option = { + 'lock' => 1, 'name' => 1, - 'onboot' => 1, + 'onboot' => 1, 'shares' => 1, 'startup' => 1, + 'description' => 1, }; # hotplug changes in [PENDING] @@ -3794,8 +3952,8 @@ sub vmconfig_hotplug_pending { my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1'); - my @delete = PVE::Tools::split_list($conf->{pending}->{delete}); - foreach my $opt (@delete) { + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + while (my ($opt, $force) = each %$pending_delete_hash) { next if $selection && !$selection->{$opt}; eval { if ($opt eq 'hotplug') { @@ -3821,10 +3979,14 @@ sub vmconfig_hotplug_pending { } elsif (valid_drivename($opt)) { die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/; vm_deviceunplug($vmid, $conf, $opt); - vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt})); + vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); } elsif ($opt =~ m/^memory$/) { die "skip\n" if !$hotplug_features->{memory}; qemu_memory_hotplug($vmid, $conf, $defaults, $opt); + } elsif ($opt eq 'cpuunits') { + cgroups_write("cpu", $vmid, "cpu.shares", $defaults->{cpuunits}); + } elsif ($opt eq 'cpulimit') { + cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", -1); } else { die "skip\n"; } @@ -3859,17 +4021,17 @@ sub vmconfig_hotplug_pending { } elsif ($opt eq 'balloon') { # enable/disable balloning device is not hotpluggable my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon}); - my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon}); + my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon}); die "skip\n" if $old_balloon_enabled != $new_balloon_enabled; # allow manual ballooning if shares is set to zero - if (!(defined($conf->{shares}) && ($conf->{shares} == 0))) { + if ((defined($conf->{shares}) && ($conf->{shares} == 0))) { my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory}; vm_mon_cmd($vmid, "balloon", value => $balloon*1024*1024); } - } elsif ($opt =~ m/^net(\d+)$/) { + } elsif ($opt =~ m/^net(\d+)$/) { # some changes can be done without hotplug - vmconfig_update_net($storecfg, $conf, $hotplug_features->{network}, + vmconfig_update_net($storecfg, $conf, $hotplug_features->{network}, $vmid, $opt, $value); } elsif (valid_drivename($opt)) { # some changes can be done without hotplug @@ -3878,6 +4040,11 @@ sub vmconfig_hotplug_pending { } elsif ($opt =~ m/^memory$/) { #dimms die "skip\n" if !$hotplug_features->{memory}; $value = qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value); + } elsif ($opt eq 'cpuunits') { + cgroups_write("cpu", $vmid, "cpu.shares", $conf->{pending}->{$opt}); + } elsif ($opt eq 'cpulimit') { + my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000); + cgroups_write("cpu", $vmid, "cpu.cfs_quota_us", $cpulimit); } else { die "skip\n"; # skip non-hot-pluggable options } @@ -3894,20 +4061,61 @@ sub vmconfig_hotplug_pending { } } +sub try_deallocate_drive { + my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_; + + if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) { + my $volid = $drive->{file}; + if (vm_is_volid_owner($storecfg, $vmid, $volid)) { + my $sid = PVE::Storage::parse_volume_id($volid); + $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']); + + # check if the disk is really unused + my $used_paths = PVE::QemuServer::get_used_paths($vmid, $storecfg, $conf, 1, $key); + my $path = PVE::Storage::path($storecfg, $volid); + die "unable to delete '$volid' - volume is still in use (snapshot?)\n" + if $used_paths->{$path}; + PVE::Storage::vdisk_free($storecfg, $volid); + return 1; + } else { + # If vm is not owner of this disk remove from config + return 1; + } + } + + return undef; +} + +sub vmconfig_delete_or_detach_drive { + my ($vmid, $storecfg, $conf, $opt, $force) = @_; + + my $drive = parse_drive($opt, $conf->{$opt}); + + my $rpcenv = PVE::RPCEnvironment::get(); + my $authuser = $rpcenv->get_user(); + + if ($force) { + $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']); + try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force); + } else { + vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive); + } +} + sub vmconfig_apply_pending { my ($vmid, $conf, $storecfg) = @_; # cold plug - my @delete = PVE::Tools::split_list($conf->{pending}->{delete}); - foreach my $opt (@delete) { # delete + my $pending_delete_hash = split_flagged_list($conf->{pending}->{delete}); + while (my ($opt, $force) = each %$pending_delete_hash) { die "internal error" if $opt =~ m/^unused/; $conf = load_config($vmid); # update/reload if (!defined($conf->{$opt})) { vmconfig_undelete_pending_option($conf, $opt); update_config_nolock($vmid, $conf, 1); } elsif (valid_drivename($opt)) { - vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt})); + vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force); vmconfig_undelete_pending_option($conf, $opt); delete $conf->{$opt}; update_config_nolock($vmid, $conf, 1); @@ -3978,7 +4186,7 @@ sub vmconfig_update_net { die "internal error" if $opt !~ m/net(\d+)/; my $iface = "tap${vmid}i$1"; - + if (&$safe_num_ne($oldnet->{rate}, $newnet->{rate})) { PVE::Network::tap_rate_limit($iface, $newnet->{rate}); } @@ -3997,7 +4205,7 @@ sub vmconfig_update_net { return 1; } } - + if ($hotplug) { vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet); } else { @@ -4022,19 +4230,21 @@ sub vmconfig_update_disk { if (!drive_is_cdrom($old_drive)) { - if ($drive->{file} ne $old_drive->{file}) { + if ($drive->{file} ne $old_drive->{file}) { die "skip\n" if !$hotplug; # unplug and register as unused vm_deviceunplug($vmid, $conf, $opt); vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive) - + } else { # update existing disk # skip non hotpluggable value - if (&$safe_num_ne($drive->{discard}, $old_drive->{discard}) || + if (&$safe_num_ne($drive->{discard}, $old_drive->{discard}) || + &$safe_string_ne($drive->{iothread}, $old_drive->{iothread}) || + &$safe_string_ne($drive->{queues}, $old_drive->{queues}) || &$safe_string_ne($drive->{cache}, $old_drive->{cache})) { die "skip\n"; } @@ -4052,7 +4262,7 @@ sub vmconfig_update_disk { &$safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) || &$safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) || &$safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max})) { - + qemu_block_set_io_throttle($vmid,"drive-$opt", ($drive->{mbps} || 0)*1024*1024, ($drive->{mbps_rd} || 0)*1024*1024, @@ -4068,12 +4278,12 @@ sub vmconfig_update_disk { $drive->{iops_wr_max} || 0); } - + return 1; } } else { # cdrom - + if ($drive->{file} eq 'none') { vm_mon_cmd($vmid, "eject",force => JSON::true,device => "drive-$opt"); } else { @@ -4081,17 +4291,20 @@ sub vmconfig_update_disk { vm_mon_cmd($vmid, "eject", force => JSON::true,device => "drive-$opt"); # force eject if locked vm_mon_cmd($vmid, "change", device => "drive-$opt",target => "$path") if $path; } + + return 1; } } } - die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/; + die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/; # hotplug new disks vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive); } sub vm_start { - my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, $forcemachine, $spice_ticket) = @_; + my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, + $forcemachine, $spice_ticket) = @_; lock_config($vmid, sub { my $conf = load_config($vmid, $migratedfrom); @@ -4120,11 +4333,13 @@ sub vm_start { if ($statefile eq 'tcp') { my $localip = "localhost"; my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg'); + my $nodename = PVE::INotify::nodename(); if ($datacenterconf->{migration_unsecure}) { - my $nodename = PVE::INotify::nodename(); $localip = PVE::Cluster::remote_node_ip($nodename, 1); + $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip); } - $migrate_port = PVE::Tools::next_migrate_port(); + my $pfamily = PVE::Tools::get_host_address_family($nodename); + $migrate_port = PVE::Tools::next_migrate_port($pfamily); $migrate_uri = "tcp:${localip}:${migrate_port}"; push @$cmd, '-incoming', $migrate_uri; push @$cmd, '-S'; @@ -4146,13 +4361,7 @@ sub vm_start { my $info = pci_device_info("0000:$pciid"); die "IOMMU not present\n" if !check_iommu_support(); die "no pci device info for device '$pciid'\n" if !$info; - - if ($d->{driver} && $d->{driver} eq "vfio") { - die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid); - } else { - die "can't unbind/bind to stub pci device '$pciid'\n" if !pci_dev_bind_to_stub($info); - } - + die "can't unbind/bind pci group to vfio '$pciid'\n" if !pci_dev_group_bind_to_vfio($pciid); die "can't reset pci device '$pciid'\n" if $info->{has_fl_reset} and !pci_dev_reset($info); } } @@ -4191,10 +4400,6 @@ sub vm_start { if (!$statefile && (!defined($conf->{balloon}) || $conf->{balloon})) { vm_mon_cmd_nocheck($vmid, "balloon", value => $conf->{balloon}*1024*1024) if $conf->{balloon}; - vm_mon_cmd_nocheck($vmid, 'qom-set', - path => "machine/peripheral/balloon0", - property => "guest-stats-polling-interval", - value => 2); } foreach my $opt (keys %$conf) { @@ -4203,6 +4408,12 @@ sub vm_start { qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down}; } } + + vm_mon_cmd_nocheck($vmid, 'qom-set', + path => "machine/peripheral/balloon0", + property => "guest-stats-polling-interval", + value => 2) if (!defined($conf->{balloon}) || $conf->{balloon}); + }); } @@ -4314,17 +4525,16 @@ sub vm_stop_cleanup { my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_; eval { - fairsched_rmnod($vmid); # try to destroy group if (!$keepActive) { my $vollist = get_vm_volumes($conf); PVE::Storage::deactivate_volumes($storecfg, $vollist); } - + foreach my $ext (qw(mon qmp pid vnc qga)) { unlink "/var/run/qemu-server/${vmid}.$ext"; } - + vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes; }; warn $@ if $@; # avoid errors - just warn @@ -4356,7 +4566,7 @@ sub vm_stop { $conf = load_config($vmid); check_lock($conf) if !$skiplock; if (!defined($timeout) && $shutdown && $conf->{startup}) { - my $opts = parse_startup($conf->{startup}); + my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup}); $timeout = $opts->{down} if $opts->{down}; } } @@ -4436,15 +4646,21 @@ sub vm_suspend { } sub vm_resume { - my ($vmid, $skiplock) = @_; + my ($vmid, $skiplock, $nocheck) = @_; lock_config($vmid, sub { - my $conf = load_config($vmid); + if (!$nocheck) { - check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup')); + my $conf = load_config($vmid); - vm_mon_cmd($vmid, "cont"); + check_lock($conf) if !($skiplock || ($conf->{lock} && $conf->{lock} eq 'backup')); + + vm_mon_cmd($vmid, "cont"); + + } else { + vm_mon_cmd_nocheck($vmid, "cont"); + } }); } @@ -4470,7 +4686,6 @@ sub vm_destroy { check_lock($conf) if !$skiplock; if (!check_running($vmid)) { - fairsched_rmnod($vmid); # try to destroy group destroy_vm($storecfg, $vmid); } else { die "VM $vmid is running - destroy failed\n"; @@ -4535,30 +4750,6 @@ sub pci_dev_reset { return file_write($fn, "1"); } -sub pci_dev_bind_to_stub { - my ($dev) = @_; - - my $name = $dev->{name}; - - my $testdir = "$pcisysfs/drivers/pci-stub/$name"; - return 1 if -d $testdir; - - my $data = "$dev->{vendor} $dev->{product}"; - return undef if !file_write("$pcisysfs/drivers/pci-stub/new_id", $data); - - my $fn = "$pcisysfs/devices/$name/driver/unbind"; - if (!file_write($fn, $name)) { - return undef if -f $fn; - } - - $fn = "$pcisysfs/drivers/pci-stub/bind"; - if (! -d $testdir) { - return undef if !file_write($fn, $name); - } - - return -d $testdir; -} - sub pci_dev_bind_to_vfio { my ($dev) = @_; @@ -4629,6 +4820,7 @@ sub print_pci_addr { balloon0 => { bus => 0, addr => 3 }, watchdog => { bus => 0, addr => 4 }, scsihw0 => { bus => 0, addr => 5 }, + 'pci.3' => { bus => 0, addr => 5 }, #can also be used for virtio-scsi-single bridge scsihw1 => { bus => 0, addr => 6 }, ahci0 => { bus => 0, addr => 7 }, qga0 => { bus => 0, addr => 8 }, @@ -4691,6 +4883,38 @@ sub print_pci_addr { 'virtio13' => { bus => 2, addr => 8 }, 'virtio14' => { bus => 2, addr => 9 }, 'virtio15' => { bus => 2, addr => 10 }, + 'virtioscsi0' => { bus => 3, addr => 1 }, + 'virtioscsi1' => { bus => 3, addr => 2 }, + 'virtioscsi2' => { bus => 3, addr => 3 }, + 'virtioscsi3' => { bus => 3, addr => 4 }, + 'virtioscsi4' => { bus => 3, addr => 5 }, + 'virtioscsi5' => { bus => 3, addr => 6 }, + 'virtioscsi6' => { bus => 3, addr => 7 }, + 'virtioscsi7' => { bus => 3, addr => 8 }, + 'virtioscsi8' => { bus => 3, addr => 9 }, + 'virtioscsi9' => { bus => 3, addr => 10 }, + 'virtioscsi10' => { bus => 3, addr => 11 }, + 'virtioscsi11' => { bus => 3, addr => 12 }, + 'virtioscsi12' => { bus => 3, addr => 13 }, + 'virtioscsi13' => { bus => 3, addr => 14 }, + 'virtioscsi14' => { bus => 3, addr => 15 }, + 'virtioscsi15' => { bus => 3, addr => 16 }, + 'virtioscsi16' => { bus => 3, addr => 17 }, + 'virtioscsi17' => { bus => 3, addr => 18 }, + 'virtioscsi18' => { bus => 3, addr => 19 }, + 'virtioscsi19' => { bus => 3, addr => 20 }, + 'virtioscsi20' => { bus => 3, addr => 21 }, + 'virtioscsi21' => { bus => 3, addr => 22 }, + 'virtioscsi22' => { bus => 3, addr => 23 }, + 'virtioscsi23' => { bus => 3, addr => 24 }, + 'virtioscsi24' => { bus => 3, addr => 25 }, + 'virtioscsi25' => { bus => 3, addr => 26 }, + 'virtioscsi26' => { bus => 3, addr => 27 }, + 'virtioscsi27' => { bus => 3, addr => 28 }, + 'virtioscsi28' => { bus => 3, addr => 29 }, + 'virtioscsi29' => { bus => 3, addr => 30 }, + 'virtioscsi30' => { bus => 3, addr => 31 }, + }; if (defined($devices->{$id}->{bus}) && defined($devices->{$id}->{addr})) { @@ -5151,6 +5375,8 @@ sub restore_vma_archive { $d->{volid} = $volid; my $path = PVE::Storage::path($cfg, $volid); + PVE::Storage::activate_volumes($cfg,[$volid]); + my $write_zeros = 1; # fixme: what other storages types initialize volumes with zero? if ($scfg->{type} eq 'dir' || $scfg->{type} eq 'nfs' || $scfg->{type} eq 'glusterfs' || @@ -5216,13 +5442,21 @@ sub restore_vma_archive { alarm($oldtimeout) if $oldtimeout; + my $vollist = []; + foreach my $devname (keys %$devinfo) { + my $volid = $devinfo->{$devname}->{volid}; + push @$vollist, $volid if $volid; + } + + my $cfg = cfs_read_file('storage.cfg'); + PVE::Storage::deactivate_volumes($cfg, $vollist); + unlink $mapfifo; if ($err) { rmtree $tmpdir; unlink $tmpfn; - my $cfg = cfs_read_file('storage.cfg'); foreach my $devname (keys %$devinfo) { my $volid = $devinfo->{$devname}->{volid}; next if !$volid; @@ -5664,6 +5898,23 @@ my $savevm_wait = sub { } }; +sub do_snapshots_with_qemu { + my ($storecfg, $volid) = @_; + + my $storage_name = PVE::Storage::parse_volume_id($volid); + + if ($qemu_snap_storage->{$storecfg->{ids}->{$storage_name}->{type}} + && !$storecfg->{ids}->{$storage_name}->{krbd}){ + return 1; + } + + if ($volid =~ m/\.(qcow2|qed)$/){ + return 1; + } + + return undef; +} + sub snapshot_create { my ($vmid, $snapname, $save_vmstate, $comment) = @_; @@ -5908,6 +6159,9 @@ sub qemu_img_convert { my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1); if ($src_storeid && $dst_storeid) { + + PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname); + my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid); my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid); @@ -5943,10 +6197,8 @@ sub qemu_img_convert { sub qemu_img_format { my ($scfg, $volname) = @_; - if ($scfg->{path} && $volname =~ m/\.(raw|qcow2|qed|vmdk)$/) { + if ($scfg->{path} && $volname =~ m/\.(raw|cow|qcow|qcow2|qed|vmdk|cloop)$/) { return $1; - } elsif ($scfg->{type} eq 'iscsi') { - return "host_device"; } else { return "raw"; } @@ -5955,32 +6207,22 @@ sub qemu_img_format { sub qemu_drive_mirror { my ($vmid, $drive, $dst_volid, $vmiddst) = @_; - my $count = 0; - my $old_len = 0; - my $frozen = undef; - my $maxwait = 120; - my $storecfg = PVE::Storage::config(); my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid); my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid); - my $format; - if ($dst_volname =~ m/\.(raw|qcow2)$/){ - $format = $1; - } + my $format = qemu_img_format($dst_scfg, $dst_volname); my $dst_path = PVE::Storage::path($storecfg, $dst_volid); my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $dst_path }; $opts->{format} = $format if $format; - #fixme : sometime drive-mirror timeout, but works fine after. - # (I have see the problem with big volume > 200GB), so we need to eval - eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; - # ignore errors here + print "drive mirror is starting (scanning bitmap) : this step can take some minutes/hours, depend of disk size and storage speed\n"; eval { + vm_mon_cmd($vmid, "drive-mirror", %$opts); while (1) { my $stats = vm_mon_cmd($vmid, "query-block-jobs"); my $stat = @$stats[0]; @@ -5988,39 +6230,29 @@ sub qemu_drive_mirror { die "error job is not mirroring" if $stat->{type} ne "mirror"; my $busy = $stat->{busy}; + my $ready = $stat->{ready}; if (my $total = $stat->{len}) { my $transferred = $stat->{offset} || 0; my $remaining = $total - $transferred; my $percent = sprintf "%.2f", ($transferred * 100 / $total); - print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy\n"; + print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n"; } - if ($stat->{len} == $stat->{offset}) { - if ($busy eq 'false') { - last if $vmiddst != $vmid; + if ($stat->{ready} eq 'true') { - # try to switch the disk if source and destination are on the same guest - eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") }; - last if !$@; - die $@ if $@ !~ m/cannot be completed/; - } + last if $vmiddst != $vmid; - if ($count > $maxwait) { - # if too much writes to disk occurs at the end of migration - #the disk needs to be freezed to be able to complete the migration - vm_suspend($vmid,1); - $frozen = 1; - } - $count ++ + # try to switch the disk if source and destination are on the same guest + eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") }; + last if !$@; + die $@ if $@ !~ m/cannot be completed/; } - $old_len = $stat->{offset}; sleep 1; } - vm_resume($vmid, 1) if $frozen; }; my $err = $@; @@ -6062,7 +6294,8 @@ sub clone_disk { my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid); if (!$format) { - $format = $drive->{format} || $defFormat; + my $scfg = PVE::Storage::storage_config($storecfg, $storeid); + $format = qemu_img_format($scfg, $volname); } # test if requested format is supported - else use default @@ -6075,6 +6308,8 @@ sub clone_disk { $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $newvmid, $format, undef, ($size/1024)); push @$newvollist, $newvolid; + PVE::Storage::activate_volumes($storecfg, $newvollist); + if (!$running || $snapname) { qemu_img_convert($drive->{file}, $newvolid, $size, $snapname); } else { @@ -6109,6 +6344,65 @@ sub get_current_qemu_machine { return $current || $default || 'pc'; } +sub qemu_machine_feature_enabled { + my ($machine, $kvmver, $version_major, $version_minor) = @_; + + my $current_major; + my $current_minor; + + if ($machine && $machine =~ m/^(pc(-i440fx|-q35)?-(\d+)\.(\d+))/) { + + $current_major = $3; + $current_minor = $4; + + } elsif ($kvmver =~ m/^(\d+)\.(\d+)/) { + + $current_major = $1; + $current_minor = $2; + } + + return 1 if $current_major >= $version_major && $current_minor >= $version_minor; + + +} + +sub qemu_machine_pxe { + my ($vmid, $conf, $machine) = @_; + + $machine = PVE::QemuServer::get_current_qemu_machine($vmid) if !$machine; + + foreach my $opt (keys %$conf) { + next if $opt !~ m/^net(\d+)$/; + my $net = PVE::QemuServer::parse_net($conf->{$opt}); + next if !$net; + my $romfile = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, 'qom-get', path => $opt, property => 'romfile'); + return $machine.".pxe" if $romfile =~ m/pxe/; + last; + } + +} + +sub qemu_use_old_bios_files { + my ($machine_type) = @_; + + return if !$machine_type; + + my $use_old_bios_files = undef; + + if ($machine_type =~ m/^(\S+)\.pxe$/) { + $machine_type = $1; + $use_old_bios_files = 1; + } else { + # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we + # load new efi bios files on migration. So this hack is required to allow + # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when + # updrading from proxmox-ve-3.X to proxmox-ve 4.0 + $use_old_bios_files = !qemu_machine_feature_enabled ($machine_type, undef, 2, 4); + } + + return ($use_old_bios_files, $machine_type); +} + sub lspci { my $devices = {}; @@ -6122,4 +6416,109 @@ sub lspci { return $devices; } +sub vm_iothreads_list { + my ($vmid) = @_; + + my $res = vm_mon_cmd($vmid, 'query-iothreads'); + + my $iothreads = {}; + foreach my $iothread (@$res) { + $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"}; + } + + return $iothreads; +} + +sub scsihw_infos { + my ($conf, $drive) = @_; + + my $maxdev = 0; + + if ($conf->{scsihw} && ($conf->{scsihw} =~ m/^lsi/)) { + $maxdev = 7; + } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) { + $maxdev = 1; + } else { + $maxdev = 256; + } + + my $controller = int($drive->{index} / $maxdev); + my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single') ? "virtioscsi" : "scsihw"; + + return ($maxdev, $controller, $controller_prefix); +} + +# bash completion helper + +sub complete_backup_archives { + my ($cmdname, $pname, $cvalue) = @_; + + my $cfg = PVE::Storage::config(); + + my $storeid; + + if ($cvalue =~ m/^([^:]+):/) { + $storeid = $1; + } + + my $data = PVE::Storage::template_list($cfg, $storeid, 'backup'); + + my $res = []; + foreach my $id (keys %$data) { + foreach my $item (@{$data->{$id}}) { + next if $item->{format} !~ m/^vma\.(gz|lzo)$/; + push @$res, $item->{volid} if defined($item->{volid}); + } + } + + return $res; +} + +my $complete_vmid_full = sub { + my ($running) = @_; + + my $idlist = vmstatus(); + + my $res = []; + + foreach my $id (keys %$idlist) { + my $d = $idlist->{$id}; + if (defined($running)) { + next if $d->{template}; + next if $running && $d->{status} ne 'running'; + next if !$running && $d->{status} eq 'running'; + } + push @$res, $id; + + } + return $res; +}; + +sub complete_vmid { + return &$complete_vmid_full(); +} + +sub complete_vmid_stopped { + return &$complete_vmid_full(0); +} + +sub complete_vmid_running { + return &$complete_vmid_full(1); +} + +sub complete_storage { + + my $cfg = PVE::Storage::config(); + my $ids = $cfg->{ids}; + + my $res = []; + foreach my $sid (keys %$ids) { + next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1); + next if !$ids->{$sid}->{content}->{images}; + push @$res, $sid; + } + + return $res; +} + 1;