]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC/CGroup.pm
207c0c1865b56a790b8d31cb421a1b9ab8ea787e
[pve-container.git] / src / PVE / LXC / CGroup.pm
1 # cgroup handler
2 #
3 # This package should deal with figuring out the right cgroup path for a
4 # container (via the command socket), reading and writing cgroup values, and
5 # handling cgroup v1 & v2 differences.
6 #
7 # Note that the long term plan is to have resource manage functions intead of
8 # dealing with cgroup files on the outside.
9
10 package PVE::LXC::CGroup;
11
12 use strict;
13 use warnings;
14
15 use PVE::Tools qw(
16 file_get_contents
17 file_read_firstline
18 );
19
20 use PVE::LXC::Command;
21
22 # We don't want to do a command socket round trip for every cgroup read/write,
23 # so any cgroup function needs to have the container's path cached, so this
24 # package has to be instantiated.
25 #
26 # LXC keeps separate paths by controller (although they're normally all the
27 # same, in our # case anyway), so we cache them by controller as well.
28 sub new {
29 my ($class, $vmid) = @_;
30
31 my $self = { vmid => $vmid };
32
33 return bless $self, $class;
34 }
35
36 my $CPUSET_BASE = undef;
37 # Find the cpuset cgroup controller.
38 #
39 # This is a function, not a method!
40 sub cpuset_controller_path() {
41 if (!defined($CPUSET_BASE)) {
42 my $CPUSET_PATHS = [
43 # legacy cpuset cgroup:
44 ['/sys/fs/cgroup/cpuset', 'cpuset.effective_cpus'],
45 # pure cgroupv2 environment:
46 ['/sys/fs/cgroup', 'cpuset.cpus.effective'],
47 # hybrid, with cpuset moved to cgroupv2
48 ['/sys/fs/cgroup/unified', 'cpuset.cpus.effective'],
49 ];
50
51 my ($result) = grep { -f "$_->[0]/$_->[1]" } @$CPUSET_PATHS;
52 die "failed to find cpuset controller\n" if !defined($result);
53
54 $CPUSET_BASE = $result->[0];
55 }
56
57 return $CPUSET_BASE;
58 }
59
60 my $CGROUP_MODE = undef;
61 # Figure out which cgroup mode we're operating under:
62 #
63 # Returns 1 if cgroupv1 controllers exist (hybrid or legacy mode), and 2 in a
64 # cgroupv2-only environment.
65 #
66 # This is a function, not a method!
67 sub cgroup_mode() {
68 if (!defined($CGROUP_MODE)) {
69 my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems();
70 if (keys %$v1) {
71 # hybrid or legacy mode
72 $CGROUP_MODE = 1;
73 } elsif ($v2) {
74 $CGROUP_MODE = 2;
75 }
76 }
77
78 die "unknown cgroup mode\n" if !defined($CGROUP_MODE);
79 return $CGROUP_MODE;
80 }
81
82 # Get a subdirectory (without the cgroup mount point) for a controller.
83 #
84 # If `$controller` is `undef`, get the unified (cgroupv2) path.
85 #
86 # Note that in cgroup v2, lxc uses the activated controller names
87 # (`cgroup.controllers` file) as list of controllers for the unified hierarchy,
88 # so this returns a result when a `controller` is provided even when using
89 # a pure cgroupv2 setup.
90 my sub get_subdir {
91 my ($self, $controller, $limiting) = @_;
92
93 my $entry_name = $controller || 'unified';
94 my $entry = ($self->{controllers}->{$entry_name} //= {});
95
96 my $kind = $limiting ? 'limit' : 'ns';
97 my $path = $entry->{$kind};
98
99 return $path if defined $path;
100
101 $path = PVE::LXC::Command::get_cgroup_path(
102 $self->{vmid},
103 $controller,
104 $limiting,
105 ) or return undef;
106
107 # untaint:
108 if ($path =~ /\.\./) {
109 die "lxc returned suspicious path: '$path'\n";
110 }
111 ($path) = ($path =~ /^(.*)$/s);
112
113 $entry->{$kind} = $path;
114
115 return $path;
116 }
117
118 # Get a path for a controller.
119 #
120 # `$controller` may be `undef`, see get_subdir above for details.
121 sub get_path {
122 my ($self, $controller) = @_;
123
124 my $path = get_subdir($self, $controller)
125 or return undef;
126
127 # The main mount point we currenlty assume to be in a standard location.
128 return "/sys/fs/cgroup/$path" if cgroup_mode() == 2;
129 return "/sys/fs/cgroup/unified/$path" if !defined($controller);
130 return "/sys/fs/cgroup/$controller/$path";
131 }
132
133 # Parse a 'Nested keyed' file:
134 #
135 # See kernel documentation `admin-guide/cgroup-v2.rst` 4.1.
136 my sub parse_nested_keyed_file($) {
137 my ($data) = @_;
138 my $res = {};
139 foreach my $line (split(/\n/, $data)) {
140 my ($key, @values) = split(/\s+/, $line);
141
142 my $d = ($res->{$key} = {});
143
144 foreach my $value (@values) {
145 if (my ($key, $value) = ($value =~ /^([^=]+)=(.*)$/)) {
146 $d->{$key} = $value;
147 } else {
148 warn "bad key=value pair in nested keyed file\n";
149 }
150 }
151 }
152 }
153
154 # Get I/O stats for a container.
155 sub get_io_stats {
156 my ($self) = @_;
157
158 my $res = {
159 diskread => 0,
160 diskwrite => 0,
161 };
162
163 if (cgroup_mode() == 2) {
164 if (defined(my $path = $self->get_path('io'))) {
165 # cgroupv2 environment, io controller enabled
166 my $io_stat = file_get_contents("$path/io.stat");
167
168 my $data = parse_nested_keyed_file($io_stat);
169 foreach my $dev (keys %$data) {
170 my $dev = $data->{$dev};
171 if (my $b = $dev->{rbytes}) {
172 $res->{diskread} += $b;
173 }
174 if (my $b = $dev->{wbytes}) {
175 $res->{diskread} += $b;
176 }
177 }
178 } else {
179 # io controller not enabled or container not running
180 return undef;
181 }
182 } elsif (defined(my $path = $self->get_path('blkio'))) {
183 # cgroupv1 environment:
184 my $io = file_get_contents("$path/blkio.throttle.io_service_bytes_recursive");
185 foreach my $line (split(/\n/, $io)) {
186 if (my ($type, $bytes) = ($line =~ /^\d+:\d+\s+(Read|Write)\s+(\d+)$/)) {
187 $res->{diskread} += $bytes if $type eq 'Read';
188 $res->{diskwrite} += $bytes if $type eq 'Write';
189 }
190 }
191 } else {
192 # container not running
193 return undef;
194 }
195
196 return $res;
197 }
198
199 1;