memory hotplug: rework max memory handling, make phys-bits dependent

[qemu-server.git] / PVE / QemuServer / Memory.pm
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm

index d500b3b590902bdac68610a41550bde048215411..013917e79ca26c327971bc0bc38850e30358664f 100644 (file)
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -10,9 +10,57 @@ use PVE::QemuServer;
  use PVE::QemuServer::Monitor qw(mon_cmd);
  
  my $MAX_NUMA = 8;
-my $MAX_MEM = 4194304;
  my $STATICMEM = 1024;
  
+my $_host_bits;
+my sub get_host_phys_address_bits {
+    return $_host_bits if defined($_host_bits);
+
+    my $fh = IO::File->new ('/proc/cpuinfo', "r") or return;
+    while (defined(my $line = <$fh>)) {
+       # hopefully we never need to care about mixed (big.LITTLE) archs
+       if ($line =~ m/^address sizes\s*:\s*(\d+)\s*bits physical/i) {
+           $_host_bits = int($1);
+           $fh->close();
+           return $_host_bits;
+       }
+    }
+    $fh->close();
+    return; # undef, cannot really do anything..
+}
+
+my sub get_max_mem {
+    my ($conf) = @_;
+
+    my $cpu = {};
+    if (my $cpu_prop_str = $conf->{cpu}) {
+       $cpu = PVE::JSONSchema::parse_property_string('pve-vm-cpu-conf', $cpu_prop_str)
+           or die "Cannot parse cpu description: $cpu_prop_str\n";
+    }
+    my $bits;
+    if (my $phys_bits = $cpu->{'phys-bits'}) {
+       if ($phys_bits eq 'host') {
+           $bits = get_host_phys_address_bits();
+       } elsif ($phys_bits =~ /^(\d+)$/) {
+           $bits = int($phys_bits);
+       }
+    }
+
+    if (!defined($bits)) {
+       my $host_bits = get_host_phys_address_bits() // 36; # fixme: what fallback?
+       if ($cpu->{cputype} && $cpu->{cputype} =~ /^(host|max)$/) {
+           $bits = $host_bits;
+       } else {
+           $bits = $host_bits > 40 ? 40 : $host_bits; # take the smaller one
+       }
+    }
+
+    # remove 20 bits to get MB and half that as QEMU needs some overhead
+    my $bits_to_max_mem = int(1 << ($bits - 21));
+
+    return $bits_to_max_mem > 4*1024*1024 ? 4*1024*1024 : $bits_to_max_mem;
+}
+
  sub get_numa_node_list {
      my ($conf) = @_;
      my @numa_map;
@@ -93,11 +141,11 @@ sub foreach_reverse_dimm {
  
      for (my $j = 0; $j < 8; $j++) {
         for (my $i = 0; $i < 32; $i++) {
-           my $name = "dimm${dimm_id}";
-           $dimm_id--;
+           my $name = "dimm${dimm_id}";
+           $dimm_id--;
             my $numanode = $numa_map[(31-$i) % @numa_map];
-           $current_size -= $dimm_size;
-           &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory);
+           $current_size -= $dimm_size;
+           &$func($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory);
             return  $current_size if $current_size <= $memory;
         }
         $dimm_size /= 2;
@@ -120,18 +168,20 @@ sub qemu_memory_hotplug {
      $static_memory = $static_memory * $sockets if ($conf->{hugepages} && $conf->{hugepages} == 1024);
  
      die "memory can't be lower than $static_memory MB" if $value < $static_memory;
-    die "you cannot add more memory than $MAX_MEM MB!\n" if $memory > $MAX_MEM;
+    my $MAX_MEM = get_max_mem($conf);
+    die "you cannot add more memory than max mem $MAX_MEM MB!\n" if $memory > $MAX_MEM;
  
-    if($value > $memory) {
+    if ($value > $memory) {
  
-       my $numa_hostmap = get_numa_guest_to_host_map($conf) if $conf->{hugepages};
+       my $numa_hostmap;
  
-       foreach_dimm($conf, $vmid, $value, $sockets, sub {
+       foreach_dimm($conf, $vmid, $value, $sockets, sub {
             my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
  
                 return if $current_size <= $conf->{memory};
  
                 if ($conf->{hugepages}) {
+                   $numa_hostmap = get_numa_guest_to_host_map($conf) if !$numa_hostmap;
  
                     my $hugepages_size = hugepages_size($conf, $dimm_size);
                     my $path = hugepages_mount_path($hugepages_size);
@@ -142,8 +192,7 @@ sub qemu_memory_hotplug {
                         my $hugepages_host_topology = hugepages_host_topology();
                         hugepages_allocate($hugepages_topology, $hugepages_host_topology);
  
-                       eval { mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-file", id => "mem-$name", props => {
-                                            size => int($dimm_size*1024*1024), 'mem-path' => $path, share => JSON::true, prealloc => JSON::true } ); };
+                       eval { mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-file", id => "mem-$name", size => int($dimm_size*1024*1024), 'mem-path' => $path, share => JSON::true, prealloc => JSON::true ) };
                         if (my $err = $@) {
                             hugepages_reset($hugepages_host_topology);
                             die $err;
@@ -154,7 +203,7 @@ sub qemu_memory_hotplug {
                     eval { hugepages_update_locked($code); };
  
                 } else {
-                   eval { mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-ram", id => "mem-$name", props => { size => int($dimm_size*1024*1024) } ) };
+                   eval { mon_cmd($vmid, "object-add", 'qom-type' => "memory-backend-ram", id => "mem-$name", size => int($dimm_size*1024*1024) ) };
                 }
  
                 if (my $err = $@) {
@@ -174,14 +223,14 @@ sub qemu_memory_hotplug {
  
      } else {
  
-       foreach_reverse_dimm($conf, $vmid, $value, $sockets, sub {
+       foreach_reverse_dimm($conf, $vmid, $value, $sockets, sub {
             my ($conf, $vmid, $name, $dimm_size, $numanode, $current_size, $memory) = @_;
  
                 return if $current_size >= $conf->{memory};
                 print "try to unplug memory dimm $name\n";
  
                 my $retry = 0;
-               while (1) {
+               while (1) {
                     eval { PVE::QemuServer::qemu_devicedel($vmid, $name) };
                     sleep 3;
                     my $dimm_list = qemu_dimm_list($vmid);
@@ -224,7 +273,14 @@ sub config {
  
      if ($hotplug_features->{memory}) {
         die "NUMA needs to be enabled for memory hotplug\n" if !$conf->{numa};
+       my $MAX_MEM = get_max_mem($conf);
         die "Total memory is bigger than ${MAX_MEM}MB\n" if $memory > $MAX_MEM;
+
+       for (my $i = 0; $i < $MAX_NUMA; $i++) {
+           die "cannot enable memory hotplugging with custom NUMA topology\n"
+               if $conf->{"numa$i"};
+       }
+
         my $sockets = 1;
         $sockets = $conf->{sockets} if $conf->{sockets};
  
@@ -286,21 +342,18 @@ sub config {
             if $numa_totalmemory && $numa_totalmemory != $static_memory;
  
         #if no custom tology, we split memory and cores across numa nodes
-       if(!$numa_totalmemory) {
-
+       if (!$numa_totalmemory) {
             my $numa_memory = ($static_memory / $sockets);
  
             for (my $i = 0; $i < $sockets; $i++)  {
                 die "host NUMA node$i doesn't exist\n" if ! -d "/sys/devices/system/node/node$i/" && $conf->{hugepages};
  
-               my $cpustart = ($cores * $i);
-               my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1;
-               my $cpus = $cpustart;
-               $cpus .= "-$cpuend" if $cpuend;
-
                 my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
-
                 push @$cmd, '-object', $mem_object;
+
+               my $cpus = ($cores * $i);
+               $cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
+
                 push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
             }
         }
@@ -315,11 +368,8 @@ sub config {
             push @$cmd, "-object" , $mem_object;
             push @$cmd, "-device", "pc-dimm,id=$name,memdev=mem-$name,node=$numanode";
  
-           #if dimm_memory is not aligned to dimm map
-           if($current_size > $memory) {
-                $conf->{memory} = $current_size;
-                PVE::QemuConfig->write_config($vmid, $conf);
-           }
+           die "memory size ($memory) must be aligned to $dimm_size for hotplugging\n"
+               if $current_size > $memory;
         });
      }
  }