]>
Commit | Line | Data |
---|---|---|
86dff11c AD |
1 | # cgroup handler |
2 | # | |
3 | # This package should deal with figuring out the right cgroup path for a | |
4 | # container (via the command socket), reading and writing cgroup values, and | |
5 | # handling cgroup v1 & v2 differences. | |
6 | # | |
7 | # Note that the long term plan is to have resource manage functions instead of | |
8 | # dealing with cgroup files on the outside. | |
9 | ||
10 | package PVE::CGroup; | |
11 | ||
12 | use strict; | |
13 | use warnings; | |
14 | ||
15 | use IO::File; | |
16 | use IO::Select; | |
17 | use POSIX qw(); | |
18 | ||
19 | use PVE::ProcFSTools; | |
20 | use PVE::Tools qw( | |
21 | file_get_contents | |
22 | file_read_firstline | |
23 | ); | |
24 | ||
86dff11c AD |
25 | # We don't want to do a command socket round trip for every cgroup read/write, |
26 | # so any cgroup function needs to have the container's path cached, so this | |
27 | # package has to be instantiated. | |
28 | # | |
29 | # LXC keeps separate paths by controller (although they're normally all the | |
30 | # same, in our # case anyway), so we cache them by controller as well. | |
31 | sub new { | |
32 | my ($class, $vmid) = @_; | |
33 | ||
34 | my $self = { vmid => $vmid }; | |
35 | ||
36 | return bless $self, $class; | |
37 | } | |
38 | ||
39 | # Get the v1 controller list. | |
40 | # | |
41 | # Returns a set (hash mapping names to `1`) of cgroupv1 controllers, and an | |
42 | # optional boolean whether a unified (cgroupv2) hierarchy exists. | |
2cae54b9 | 43 | my sub get_v1_controllers { |
86dff11c AD |
44 | my $v1 = {}; |
45 | my $v2 = 0; | |
46 | my $data = PVE::Tools::file_get_contents('/proc/self/cgroup'); | |
47 | while ($data =~ /^\d+:([^:\n]*):.*$/gm) { | |
48 | my $type = $1; | |
49 | if (length($type)) { | |
50 | $v1->{$_} = 1 foreach split(/,/, $type); | |
51 | } else { | |
52 | $v2 = 1; | |
53 | } | |
54 | } | |
55 | return wantarray ? ($v1, $v2) : $v1; | |
56 | } | |
57 | ||
58 | # Get the set v2 controller list from the `cgroup.controllers` file. | |
59 | my sub get_v2_controllers { | |
60 | my $v2 = eval { file_get_contents('/sys/fs/cgroup/cgroup.controllers') } | |
61 | || eval { file_get_contents('/sys/fs/cgroup/unified/cgroup.controllers') }; | |
62 | return undef if !defined $v2; | |
63 | ||
64 | # It's a simple space separated list: | |
65 | return { map { $_ => 1 } split(/\s+/, $v2) }; | |
66 | } | |
67 | ||
68 | my $CGROUP_CONTROLLERS = undef; | |
69 | # Get a list of controllers enabled in each cgroup subsystem. | |
70 | # | |
71 | # This is a more complete version of `PVE::LXC::get_cgroup_subsystems`. | |
72 | # | |
73 | # Returns 2 sets (hashes mapping controller names to `1`), one for each cgroup | |
74 | # version. | |
75 | sub get_cgroup_controllers() { | |
76 | if (!defined($CGROUP_CONTROLLERS)) { | |
77 | my ($v1, undef) = get_v1_controllers(); | |
78 | my $v2 = get_v2_controllers(); | |
79 | ||
80 | $CGROUP_CONTROLLERS = [$v1, $v2]; | |
81 | } | |
82 | ||
83 | return $CGROUP_CONTROLLERS->@*; | |
84 | } | |
85 | ||
86 | my $CGROUP_MODE = undef; | |
87 | # Figure out which cgroup mode we're operating under: | |
88 | # | |
55b6de70 WB |
89 | # For this we check the file system type of `/sys/fs/cgroup` as it may well be possible that some |
90 | # additional cgroupv1 mount points have been created by tools such as `systemd-nspawn`, or | |
91 | # manually. | |
92 | # | |
93 | # Returns 1 for what we consider the hybrid layout, 2 for what we consider the unified layout. | |
86dff11c AD |
94 | # |
95 | # NOTE: To fully support a hybrid layout it is better to use functions like | |
55b6de70 | 96 | # `cpuset_controller_path` and not rely on this value for anything involving paths. |
86dff11c AD |
97 | # |
98 | # This is a function, not a method! | |
99 | sub cgroup_mode() { | |
100 | if (!defined($CGROUP_MODE)) { | |
55b6de70 WB |
101 | my $mounts = PVE::ProcFSTools::parse_proc_mounts(); |
102 | for my $entry (@$mounts) { | |
103 | my ($what, $dir, $fstype, $opts) = @$entry; | |
104 | if ($dir eq '/sys/fs/cgroup') { | |
105 | if ($fstype eq 'cgroup2') { | |
106 | $CGROUP_MODE = 2; | |
107 | last; | |
108 | } else { | |
109 | $CGROUP_MODE = 1; | |
110 | last; | |
111 | } | |
112 | } | |
86dff11c AD |
113 | } |
114 | } | |
115 | ||
116 | die "unknown cgroup mode\n" if !defined($CGROUP_MODE); | |
117 | return $CGROUP_MODE; | |
118 | } | |
119 | ||
120 | my $CGROUPV2_PATH = undef; | |
121 | sub cgroupv2_base_path() { | |
122 | if (!defined($CGROUPV2_PATH)) { | |
123 | if (cgroup_mode() == 2) { | |
124 | $CGROUPV2_PATH = '/sys/fs/cgroup'; | |
125 | } else { | |
126 | $CGROUPV2_PATH = '/sys/fs/cgroup/unified'; | |
127 | } | |
128 | } | |
129 | return $CGROUPV2_PATH; | |
130 | } | |
131 | ||
132 | # Find a cgroup controller and return its path and version. | |
133 | # | |
134 | # LXC initializes the unified hierarchy first, so if a controller is | |
135 | # available via both we favor cgroupv2 here as well. | |
136 | # | |
137 | # Returns nothing if the controller is not available. | |
9465abe2 | 138 | |
86dff11c AD |
139 | sub find_cgroup_controller($) { |
140 | my ($controller) = @_; | |
141 | ||
142 | my ($v1, $v2) = get_cgroup_controllers(); | |
143 | ||
144 | if (!defined($controller) || $v2->{$controller}) { | |
145 | my $path = cgroupv2_base_path(); | |
146 | return wantarray ? ($path, 2) : $path; | |
147 | } | |
148 | ||
149 | if (defined($controller) && $v1->{$controller}) { | |
150 | my $path = "/sys/fs/cgroup/$controller"; | |
151 | return wantarray ? ($path, 1) : $path; | |
152 | } | |
153 | ||
154 | return; | |
155 | } | |
156 | ||
157 | my $CG_PATH_CPUSET = undef; | |
158 | my $CG_VER_CPUSET = undef; | |
159 | # Find the cpuset cgroup controller. | |
160 | # | |
161 | # This is a function, not a method! | |
162 | sub cpuset_controller_path() { | |
163 | if (!defined($CG_PATH_CPUSET)) { | |
164 | ($CG_PATH_CPUSET, $CG_VER_CPUSET) = find_cgroup_controller('cpuset') | |
165 | or die "failed to find cpuset controller\n"; | |
166 | } | |
167 | ||
168 | return wantarray ? ($CG_PATH_CPUSET, $CG_VER_CPUSET) : $CG_PATH_CPUSET; | |
169 | } | |
170 | ||
171 | # Get a subdirectory (without the cgroup mount point) for a controller. | |
9465abe2 | 172 | sub get_subdir { |
86dff11c AD |
173 | my ($self, $controller, $limiting) = @_; |
174 | ||
9465abe2 | 175 | die "implement in subclass"; |
86dff11c AD |
176 | } |
177 | ||
178 | # Get path and version for a controller. | |
179 | # | |
180 | # `$controller` may be `undef`, see get_subdir above for details. | |
181 | # | |
182 | # Returns either just the path, or the path and cgroup version as a tuple. | |
183 | sub get_path { | |
184 | my ($self, $controller, $limiting) = @_; | |
86dff11c AD |
185 | # Find the controller before querying the lxc monitor via a socket: |
186 | my ($cgpath, $ver) = find_cgroup_controller($controller) | |
187 | or return undef; | |
188 | ||
9465abe2 | 189 | my $path = $self->get_subdir($controller, $limiting) |
86dff11c AD |
190 | or return undef; |
191 | ||
192 | $path = "$cgpath/$path"; | |
193 | return wantarray ? ($path, $ver) : $path; | |
194 | } | |
195 | ||
196 | # Convenience method to get the path info if the first existing controller. | |
197 | # | |
198 | # Returns the same as `get_path`. | |
199 | sub get_any_path { | |
200 | my ($self, $limiting, @controllers) = @_; | |
201 | ||
202 | my ($path, $ver); | |
203 | for my $c (@controllers) { | |
204 | ($path, $ver) = $self->get_path($c, $limiting); | |
205 | last if defined $path; | |
206 | } | |
207 | return wantarray ? ($path, $ver) : $path; | |
208 | } | |
209 | ||
210 | # Parse a 'Nested keyed' file: | |
211 | # | |
212 | # See kernel documentation `admin-guide/cgroup-v2.rst` 4.1. | |
213 | my sub parse_nested_keyed_file($) { | |
214 | my ($data) = @_; | |
215 | my $res = {}; | |
216 | foreach my $line (split(/\n/, $data)) { | |
217 | my ($key, @values) = split(/\s+/, $line); | |
218 | ||
219 | my $d = ($res->{$key} = {}); | |
220 | ||
221 | foreach my $value (@values) { | |
222 | if (my ($key, $value) = ($value =~ /^([^=]+)=(.*)$/)) { | |
223 | $d->{$key} = $value; | |
224 | } else { | |
225 | warn "bad key=value pair in nested keyed file\n"; | |
226 | } | |
227 | } | |
228 | } | |
229 | return $res; | |
230 | } | |
231 | ||
232 | # Parse a 'Flat keyed' file: | |
233 | # | |
234 | # See kernel documentation `admin-guide/cgroup-v2.rst` 4.1. | |
235 | my sub parse_flat_keyed_file($) { | |
236 | my ($data) = @_; | |
237 | my $res = {}; | |
238 | foreach my $line (split(/\n/, $data)) { | |
239 | if (my ($key, $value) = ($line =~ /^(\S+)\s+(.*)$/)) { | |
240 | $res->{$key} = $value; | |
241 | } else { | |
242 | warn "bad 'key value' pair in flat keyed file\n"; | |
243 | } | |
244 | } | |
245 | return $res; | |
246 | } | |
247 | ||
248 | # Parse out 'diskread' and 'diskwrite' values from I/O stats for this container. | |
249 | sub get_io_stats { | |
250 | my ($self) = @_; | |
251 | ||
252 | my $res = { | |
253 | diskread => 0, | |
254 | diskwrite => 0, | |
255 | }; | |
256 | ||
257 | # With cgroupv1 we have a 'blkio' controller, with cgroupv2 it's just 'io': | |
258 | my ($path, $ver) = $self->get_any_path(1, 'io', 'blkio'); | |
259 | if (!defined($path)) { | |
260 | # container not running | |
261 | return undef; | |
262 | } elsif ($ver == 2) { | |
263 | # cgroupv2 environment, io controller enabled | |
264 | my $io_stat = file_get_contents("$path/io.stat"); | |
265 | ||
266 | my $data = parse_nested_keyed_file($io_stat); | |
267 | foreach my $dev (keys %$data) { | |
268 | my $dev = $data->{$dev}; | |
269 | if (my $b = $dev->{rbytes}) { | |
270 | $res->{diskread} += $b; | |
271 | } | |
272 | if (my $b = $dev->{wbytes}) { | |
194f706b | 273 | $res->{diskwrite} += $b; |
86dff11c AD |
274 | } |
275 | } | |
276 | ||
277 | return $res; | |
278 | } elsif ($ver == 1) { | |
279 | # cgroupv1 environment: | |
280 | my $io = file_get_contents("$path/blkio.throttle.io_service_bytes_recursive"); | |
281 | foreach my $line (split(/\n/, $io)) { | |
282 | if (my ($type, $bytes) = ($line =~ /^\d+:\d+\s+(Read|Write)\s+(\d+)$/)) { | |
283 | $res->{diskread} += $bytes if $type eq 'Read'; | |
284 | $res->{diskwrite} += $bytes if $type eq 'Write'; | |
285 | } | |
286 | } | |
287 | ||
288 | return $res; | |
289 | } else { | |
290 | die "bad cgroup version: $ver\n"; | |
291 | } | |
292 | ||
293 | # container not running | |
294 | return undef; | |
295 | } | |
296 | ||
297 | # Read utime and stime for this container from the cpuacct cgroup. | |
298 | # Values are in milliseconds! | |
299 | sub get_cpu_stat { | |
300 | my ($self) = @_; | |
301 | ||
302 | my $res = { | |
303 | utime => 0, | |
304 | stime => 0, | |
305 | }; | |
306 | ||
307 | my ($path, $ver) = $self->get_any_path(1, 'cpuacct', 'cpu'); | |
308 | if (!defined($path)) { | |
309 | # container not running | |
310 | return undef; | |
311 | } elsif ($ver == 2) { | |
312 | my $data = eval { file_get_contents("$path/cpu.stat") }; | |
313 | ||
314 | # or no io controller available: | |
315 | return undef if !defined($data); | |
316 | ||
317 | $data = parse_flat_keyed_file($data); | |
318 | $res->{utime} = int($data->{user_usec} / 1000); | |
319 | $res->{stime} = int($data->{system_usec} / 1000); | |
320 | } elsif ($ver == 1) { | |
321 | # cgroupv1 environment: | |
322 | my $clock_ticks = POSIX::sysconf(&POSIX::_SC_CLK_TCK); | |
323 | my $clk_to_usec = 1000 / $clock_ticks; | |
324 | ||
325 | my $data = parse_flat_keyed_file(file_get_contents("$path/cpuacct.stat")); | |
326 | $res->{utime} = int($data->{user} * $clk_to_usec); | |
327 | $res->{stime} = int($data->{system} * $clk_to_usec); | |
328 | } else { | |
329 | die "bad cgroup version: $ver\n"; | |
330 | } | |
331 | ||
332 | return $res; | |
333 | } | |
334 | ||
335 | # Parse some memory data from `memory.stat` | |
336 | sub get_memory_stat { | |
337 | my ($self) = @_; | |
338 | ||
339 | my $res = { | |
340 | mem => 0, | |
341 | swap => 0, | |
342 | }; | |
343 | ||
344 | my ($path, $ver) = $self->get_path('memory', 1); | |
345 | if (!defined($path)) { | |
346 | # container most likely isn't running | |
347 | return undef; | |
348 | } elsif ($ver == 2) { | |
349 | my $mem = file_get_contents("$path/memory.current"); | |
350 | my $swap = file_get_contents("$path/memory.swap.current"); | |
61f1cb1a | 351 | my $stat = parse_flat_keyed_file(file_get_contents("$path/memory.stat")); |
86dff11c AD |
352 | |
353 | chomp ($mem, $swap); | |
354 | ||
61f1cb1a | 355 | $res->{mem} = $mem - $stat->{file}; |
86dff11c AD |
356 | $res->{swap} = $swap; |
357 | } elsif ($ver == 1) { | |
358 | # cgroupv1 environment: | |
359 | my $stat = parse_flat_keyed_file(file_get_contents("$path/memory.stat")); | |
360 | my $mem = file_get_contents("$path/memory.usage_in_bytes"); | |
361 | my $memsw = file_get_contents("$path/memory.memsw.usage_in_bytes"); | |
362 | chomp ($mem, $memsw); | |
363 | ||
364 | $res->{mem} = $mem - $stat->{total_cache}; | |
365 | $res->{swap} = $memsw - $mem; | |
366 | } else { | |
367 | die "bad cgroup version: $ver\n"; | |
368 | } | |
369 | ||
370 | return $res; | |
371 | } | |
372 | ||
0bc3dac9 AD |
373 | sub get_pressure_stat { |
374 | my ($self) = @_; | |
375 | ||
376 | my $res = { | |
377 | cpu => { | |
378 | some => { avg10 => 0, avg60 => 0, avg300 => 0 } | |
379 | }, | |
380 | memory => { | |
381 | some => { avg10 => 0, avg60 => 0, avg300 => 0 }, | |
382 | full => { avg10 => 0, avg60 => 0, avg300 => 0 } | |
383 | }, | |
384 | io => { | |
385 | some => { avg10 => 0, avg60 => 0, avg300 => 0 }, | |
386 | full => { avg10 => 0, avg60 => 0, avg300 => 0 } | |
387 | }, | |
388 | }; | |
389 | ||
b82ddf5d | 390 | my ($path, $version) = $self->get_path(undef, 1); |
0bc3dac9 | 391 | if (!defined($path)) { |
b82ddf5d TL |
392 | return $res; # container or VM most likely isn't running, retrun zero stats |
393 | } elsif ($version == 1) { | |
394 | return undef; # v1 controller does not provides pressure stat | |
395 | } elsif ($version == 2) { | |
eadfaabd TL |
396 | for my $type (qw(cpu memory io)) { |
397 | my $stats = PVE::ProcFSTools::parse_pressure("$path/$type.pressure"); | |
398 | $res->{$type} = $stats if $stats; | |
0bc3dac9 AD |
399 | } |
400 | } else { | |
b82ddf5d | 401 | die "bad cgroup version: $version\n"; |
0bc3dac9 AD |
402 | } |
403 | ||
404 | return $res; | |
405 | } | |
406 | ||
86dff11c AD |
407 | # Change the memory limit for this container. |
408 | # | |
409 | # Dies on error (including a not-running or currently-shutting-down guest). | |
410 | sub change_memory_limit { | |
411 | my ($self, $mem_bytes, $swap_bytes) = @_; | |
412 | ||
413 | my ($path, $ver) = $self->get_path('memory', 1); | |
414 | if (!defined($path)) { | |
415 | die "trying to change memory cgroup values: container not running\n"; | |
416 | } elsif ($ver == 2) { | |
417 | PVE::ProcFSTools::write_proc_entry("$path/memory.swap.max", $swap_bytes) | |
418 | if defined($swap_bytes); | |
419 | PVE::ProcFSTools::write_proc_entry("$path/memory.max", $mem_bytes) | |
420 | if defined($mem_bytes); | |
421 | } elsif ($ver == 1) { | |
422 | # With cgroupv1 we cannot control memory and swap limits separately. | |
423 | # This also means that since the two values aren't independent, we need to handle | |
424 | # growing and shrinking separately. | |
425 | my $path_mem = "$path/memory.limit_in_bytes"; | |
426 | my $path_memsw = "$path/memory.memsw.limit_in_bytes"; | |
427 | ||
428 | my $old_mem_bytes = file_get_contents($path_mem); | |
429 | my $old_memsw_bytes = file_get_contents($path_memsw); | |
430 | chomp($old_mem_bytes, $old_memsw_bytes); | |
431 | ||
432 | $mem_bytes //= $old_mem_bytes; | |
433 | $swap_bytes //= $old_memsw_bytes - $old_mem_bytes; | |
434 | my $memsw_bytes = $mem_bytes + $swap_bytes; | |
435 | ||
436 | if ($memsw_bytes > $old_memsw_bytes) { | |
437 | # Growing the limit means growing the combined limit first, then pulling the | |
438 | # memory limitup. | |
439 | PVE::ProcFSTools::write_proc_entry($path_memsw, $memsw_bytes); | |
440 | PVE::ProcFSTools::write_proc_entry($path_mem, $mem_bytes); | |
441 | } else { | |
442 | # Shrinking means we first need to shrink the mem-only memsw cannot be | |
443 | # shrunk below it. | |
444 | PVE::ProcFSTools::write_proc_entry($path_mem, $mem_bytes); | |
445 | PVE::ProcFSTools::write_proc_entry($path_memsw, $memsw_bytes); | |
446 | } | |
447 | } else { | |
448 | die "bad cgroup version: $ver\n"; | |
449 | } | |
450 | ||
451 | # return a truth value | |
452 | return 1; | |
453 | } | |
454 | ||
455 | # Change the cpu quota for a container. | |
456 | # | |
457 | # Dies on error (including a not-running or currently-shutting-down guest). | |
458 | sub change_cpu_quota { | |
459 | my ($self, $quota, $period) = @_; | |
460 | ||
461 | die "quota without period not allowed\n" if !defined($period) && defined($quota); | |
462 | ||
463 | my ($path, $ver) = $self->get_path('cpu', 1); | |
464 | if (!defined($path)) { | |
465 | die "trying to change cpu quota cgroup values: container not running\n"; | |
466 | } elsif ($ver == 2) { | |
467 | # cgroupv2 environment, an undefined (unlimited) quota is defined as "max" | |
468 | # in this interface: | |
469 | $quota //= 'max'; # unlimited | |
470 | if (defined($quota)) { | |
471 | PVE::ProcFSTools::write_proc_entry("$path/cpu.max", "$quota $period"); | |
472 | } else { | |
473 | # we're allowed to only write the quota: | |
474 | PVE::ProcFSTools::write_proc_entry("$path/cpu.max", 'max'); | |
475 | } | |
476 | } elsif ($ver == 1) { | |
d37a7186 OB |
477 | $quota //= -1; # default (unlimited) |
478 | $period //= 100_000; # default (100 ms) | |
86dff11c AD |
479 | PVE::ProcFSTools::write_proc_entry("$path/cpu.cfs_period_us", $period); |
480 | PVE::ProcFSTools::write_proc_entry("$path/cpu.cfs_quota_us", $quota); | |
481 | } else { | |
482 | die "bad cgroup version: $ver\n"; | |
483 | } | |
484 | ||
485 | # return a truth value | |
486 | return 1; | |
487 | } | |
488 | ||
07c10d58 TL |
489 | # Clamp an integer to the supported range of CPU shares from the booted CGroup version |
490 | # | |
491 | # Returns the default if called with an undefined value. | |
492 | sub clamp_cpu_shares { | |
493 | my ($shares) = @_; | |
494 | ||
495 | my $is_cgroupv2 = cgroup_mode() == 2; | |
496 | ||
497 | return $is_cgroupv2 ? 100 : 1024 if !defined($shares); | |
498 | ||
499 | if ($is_cgroupv2) { | |
500 | $shares = 10000 if $shares >= 10000; # v1 can be higher, so clamp v2 there | |
501 | } else { | |
502 | $shares = 2 if $shares < 2; # v2 can be lower, so clamp v1 there | |
503 | } | |
504 | return $shares; | |
505 | } | |
506 | ||
86dff11c AD |
507 | # Change the cpu "shares" for a container. |
508 | # | |
509 | # In cgroupv1 we used a value in `[0..500000]` with a default of 1024. | |
510 | # | |
511 | # In cgroupv2 we do not have "shares", we have "weights" in the range | |
512 | # of `[1..10000]` with a default of 100. | |
513 | # | |
514 | # Since the default values don't match when scaling linearly, we use the | |
515 | # values we get as-is and simply error for values >10000 in cgroupv2. | |
516 | # | |
517 | # It is left to the user to figure this out for now. | |
518 | # | |
519 | # Dies on error (including a not-running or currently-shutting-down guest). | |
17832659 TL |
520 | # |
521 | # NOTE: if you add a new param during 7.x you need to break older pve-container/qemu-server versions | |
522 | # that previously passed a `$cgroupv1_default`, which got removed due to being ignored anyway. | |
523 | # otherwise you risk that a old module bogusly passes some cgroup default as your new param. | |
86dff11c | 524 | sub change_cpu_shares { |
91fa9a5e | 525 | my ($self, $shares) = @_; |
86dff11c AD |
526 | |
527 | my ($path, $ver) = $self->get_path('cpu', 1); | |
528 | if (!defined($path)) { | |
529 | die "trying to change cpu shares/weight cgroup values: container not running\n"; | |
530 | } elsif ($ver == 2) { | |
531 | # the cgroupv2 documentation defines the default to 100 | |
532 | $shares //= 100; | |
533 | die "cpu weight (shares) must be in range [1, 10000]\n" if $shares < 1 || $shares > 10000; | |
534 | PVE::ProcFSTools::write_proc_entry("$path/cpu.weight", $shares); | |
535 | } elsif ($ver == 1) { | |
6d7c3065 | 536 | $shares //= 1024; |
91fa9a5e | 537 | PVE::ProcFSTools::write_proc_entry("$path/cpu.shares", $shares); |
86dff11c AD |
538 | } else { |
539 | die "bad cgroup version: $ver\n"; | |
540 | } | |
541 | ||
542 | # return a truth value | |
543 | return 1; | |
544 | } | |
545 | ||
546 | my sub v1_freeze_thaw { | |
547 | my ($self, $controller_path, $freeze) = @_; | |
9465abe2 | 548 | my $path = $self->get_subdir('freezer', 1) |
86dff11c AD |
549 | or die "trying to freeze container: container not running\n"; |
550 | $path = "$controller_path/$path/freezer.state"; | |
551 | ||
552 | my $data = $freeze ? 'FROZEN' : 'THAWED'; | |
553 | PVE::ProcFSTools::write_proc_entry($path, $data); | |
554 | ||
555 | # Here we just poll the freezer.state once per second. | |
556 | while (1) { | |
557 | my $state = file_get_contents($path); | |
558 | chomp $state; | |
559 | last if $state eq $data; | |
560 | } | |
561 | } | |
562 | ||
563 | my sub v2_freeze_thaw { | |
564 | my ($self, $controller_path, $freeze) = @_; | |
9465abe2 | 565 | my $path = $self->get_subdir(undef, 1) |
86dff11c AD |
566 | or die "trying to freeze container: container not running\n"; |
567 | $path = "$controller_path/$path"; | |
568 | ||
569 | my $desired_state = $freeze ? 1 : 0; | |
570 | ||
571 | # cgroupv2 supports poll events on cgroup.events which contains the frozen | |
572 | # state. | |
573 | my $fh = IO::File->new("$path/cgroup.events", 'r') | |
574 | or die "failed to open $path/cgroup.events file: $!\n"; | |
575 | my $select = IO::Select->new(); | |
576 | $select->add($fh); | |
577 | ||
578 | PVE::ProcFSTools::write_proc_entry("$path/cgroup.freeze", $desired_state); | |
579 | while (1) { | |
580 | my $data = do { | |
581 | local $/ = undef; | |
582 | <$fh> | |
583 | }; | |
584 | $data = parse_flat_keyed_file($data); | |
585 | last if $data->{frozen} == $desired_state; | |
586 | my @handles = $select->has_exception(); | |
587 | next if !@handles; | |
588 | seek($fh, 0, 0) | |
589 | or die "failed to rewind cgroup.events file: $!\n"; | |
590 | } | |
591 | } | |
592 | ||
593 | # Freeze or unfreeze a container. | |
594 | # | |
595 | # This will freeze the container at its outer (limiting) cgroup path. We use | |
596 | # this instead of `lxc-freeze` as `lxc-freeze` from lxc4 will not be able to | |
597 | # fetch the cgroup path from contaienrs still running on lxc3. | |
598 | sub freeze_thaw { | |
599 | my ($self, $freeze) = @_; | |
600 | ||
601 | my $controller_path = find_cgroup_controller('freezer'); | |
602 | if (defined($controller_path)) { | |
603 | return v1_freeze_thaw($self, $controller_path, $freeze); | |
604 | } else { | |
605 | # cgroupv2 always has a freezer, there can be both cgv1 and cgv2 | |
606 | # freezers, but we'll prefer v1 when it's available as that's what lxc | |
607 | # does as well... | |
608 | return v2_freeze_thaw($self, cgroupv2_base_path(), $freeze); | |
609 | } | |
610 | } | |
611 | ||
612 | 1; |