bump version to 8.2.1

[pve-common.git] / src / PVE / CGroup.pm
diff --git a/src/PVE/CGroup.pm b/src/PVE/CGroup.pm

index 92a065464dea4077b5eb95b7d73582925fc9c762..e2839cf9850dca99f347321f5125b9d0e06d821a 100644 (file)
--- a/src/PVE/CGroup.pm
+++ b/src/PVE/CGroup.pm
@@ -86,21 +86,30 @@ sub get_cgroup_controllers() {
  my $CGROUP_MODE = undef;
  # Figure out which cgroup mode we're operating under:
  #
-# Returns 1 if cgroupv1 controllers exist (hybrid or legacy mode), and 2 in a
-# cgroupv2-only environment.
+# For this we check the file system type of `/sys/fs/cgroup` as it may well be possible that some
+# additional cgroupv1 mount points have been created by tools such as `systemd-nspawn`, or
+# manually.
+#
+# Returns 1 for what we consider the hybrid layout, 2 for what we consider the unified layout.
  #
  # NOTE: To fully support a hybrid layout it is better to use functions like
-# `cpuset_controller_path`.
+# `cpuset_controller_path` and not rely on this value for anything involving paths.
  #
  # This is a function, not a method!
  sub cgroup_mode() {
      if (!defined($CGROUP_MODE)) {
-       my ($v1, $v2) = get_cgroup_controllers();
-       if (keys %$v1) {
-           # hybrid or legacy mode
-           $CGROUP_MODE = 1;
-       } elsif ($v2) {
-           $CGROUP_MODE = 2;
+       my $mounts = PVE::ProcFSTools::parse_proc_mounts();
+       for my $entry (@$mounts) {
+           my ($what, $dir, $fstype, $opts) = @$entry;
+           if ($dir eq '/sys/fs/cgroup') {
+               if ($fstype eq 'cgroup2') {
+                   $CGROUP_MODE = 2;
+                   last;
+               } else {
+                   $CGROUP_MODE = 1;
+                   last;
+               }
+           }
         }
      }
  
@@ -399,7 +408,7 @@ sub get_pressure_stat {
  #
  # Dies on error (including a not-running or currently-shutting-down guest).
  sub change_memory_limit {
-    my ($self, $mem_bytes, $swap_bytes) = @_;
+    my ($self, $mem_bytes, $swap_bytes, $mem_high_bytes) = @_;
  
      my ($path, $ver) = $self->get_path('memory', 1);
      if (!defined($path)) {
@@ -407,8 +416,11 @@ sub change_memory_limit {
      } elsif ($ver == 2) {
         PVE::ProcFSTools::write_proc_entry("$path/memory.swap.max", $swap_bytes)
             if defined($swap_bytes);
-       PVE::ProcFSTools::write_proc_entry("$path/memory.max", $mem_bytes)
-           if defined($mem_bytes);
+       if (defined($mem_bytes)) {
+           # 'max' is the hard-limit (triggers OOM), while 'high' throttles & adds reclaim pressure
+           PVE::ProcFSTools::write_proc_entry("$path/memory.high", $mem_high_bytes // 'max');
+           PVE::ProcFSTools::write_proc_entry("$path/memory.max", $mem_bytes);
+       }
      } elsif ($ver == 1) {
         # With cgroupv1 we cannot control memory and swap limits separately.
         # This also means that since the two values aren't independent, we need to handle
@@ -477,6 +489,24 @@ sub change_cpu_quota {
      return 1;
  }
  
+# Clamp an integer to the supported range of CPU shares from the booted CGroup version
+#
+# Returns the default if called with an undefined value.
+sub clamp_cpu_shares {
+    my ($shares) = @_;
+
+    my $is_cgroupv2 = cgroup_mode() == 2;
+
+    return $is_cgroupv2 ? 100 : 1024 if !defined($shares);
+
+    if ($is_cgroupv2) {
+       $shares = 10000 if $shares >= 10000; # v1 can be higher, so clamp v2 there
+    } else {
+       $shares = 2 if $shares < 2; # v2 can be lower, so clamp v1 there
+    }
+    return $shares;
+}
+
  # Change the cpu "shares" for a container.
  #
  # In cgroupv1 we used a value in `[0..500000]` with a default of 1024.
@@ -490,8 +520,12 @@ sub change_cpu_quota {
  # It is left to the user to figure this out for now.
  #
  # Dies on error (including a not-running or currently-shutting-down guest).
+#
+# NOTE: if you add a new param during 7.x you need to break older pve-container/qemu-server versions
+#  that previously passed a `$cgroupv1_default`, which got removed due to being ignored anyway.
+#  otherwise you risk that a old module bogusly passes some cgroup default as your new param.
  sub change_cpu_shares {
-    my ($self, $shares, $cgroupv1_default) = @_;
+    my ($self, $shares) = @_;
  
      my ($path, $ver) = $self->get_path('cpu', 1);
      if (!defined($path)) {
@@ -503,7 +537,7 @@ sub change_cpu_shares {
         PVE::ProcFSTools::write_proc_entry("$path/cpu.weight", $shares);
      } elsif ($ver == 1) {
         $shares //= 1024;
-       PVE::ProcFSTools::write_proc_entry("$path/cpu.shares", $shares // $cgroupv1_default);
+       PVE::ProcFSTools::write_proc_entry("$path/cpu.shares", $shares);
      } else {
         die "bad cgroup version: $ver\n";
      }