]> git.proxmox.com Git - pve-container.git/blob - src/lxc-pve-prestart-hook
config: cpuunits: drop description for outdated special value
[pve-container.git] / src / lxc-pve-prestart-hook
1 #!/usr/bin/perl
2
3 package lxc_pve_prestart_hook;
4
5 use strict;
6 use warnings;
7
8 use Fcntl qw(O_DIRECTORY :mode);
9 use File::Path;
10 use POSIX;
11
12 use PVE::CGroup;
13 use PVE::Cluster;
14 use PVE::LXC::Config;
15 use PVE::LXC::Setup;
16 use PVE::LXC::Tools;
17 use PVE::LXC;
18 use PVE::RESTEnvironment;
19 use PVE::SafeSyslog;
20 use PVE::Storage;
21 use PVE::Syscall qw(:fsmount);
22 use PVE::Tools qw(AT_FDCWD O_PATH);
23
24 my $WARNFD;
25 sub log_warn {
26 my ($vmid, $message) = @_;
27
28 if (!defined($WARNFD)) {
29 open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings");
30 }
31 print $WARNFD "$message\n";
32 }
33
34 PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
35 my ($vmid, $vars, undef, undef) = @_;
36
37 my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
38 my $skiplock = 1 if -e $skiplock_flag_fn;
39 unlink $skiplock_flag_fn if $skiplock;
40
41 PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
42
43 PVE::RESTEnvironment->setup_default_cli_env();
44
45 return undef if ! -f PVE::LXC::Config->config_file($vmid);
46
47 my $conf = PVE::LXC::Config->load_config($vmid);
48 if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) {
49 PVE::LXC::Config->check_lock($conf);
50 }
51
52 cleanup_cgroups($vmid);
53
54 my $storage_cfg = PVE::Storage::config();
55
56 my $rootdir = $vars->{ROOTFS_PATH};
57
58 # Delete any leftover reboot-trigger file
59 unlink("/var/lib/lxc/$vmid/reboot");
60
61 my $devlist_file = "/var/lib/lxc/$vmid/devices";
62 unlink $devlist_file;
63 my $devices = [];
64
65 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
66
67 # Unmount first when the user mounted the container with "pct mount".
68 eval {
69 PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
70 };
71
72 my $setup_mountpoint;
73 if (!PVE::LXC::Tools::can_use_new_mount_api()) {
74 # Legacy mode for old kernels:
75 $setup_mountpoint = sub {
76 my ($opt, $mountpoint) = @_;
77
78 my (undef, undef, $dev) = PVE::LXC::mountpoint_mount(
79 $mountpoint,
80 $rootdir,
81 $storage_cfg,
82 undef,
83 $rootuid,
84 $rootgid,
85 );
86 push @$devices, $dev if $dev && $mountpoint->{quota};
87 };
88 } else {
89 # With newer kernels we stage mount points and then use move_mount().
90 my $rootdir_fd = undef;
91 $setup_mountpoint = sub {
92 my ($opt, $mountpoint) = @_;
93
94 my $dir = PVE::LXC::get_staging_mount_path($opt);
95 my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
96 $mountpoint,
97 $dir,
98 $storage_cfg,
99 undef,
100 $rootuid,
101 $rootgid,
102 );
103
104 my ($dest_dir, $dest_base_fd);
105 if ($rootdir_fd) {
106 # Mount relative to the rootdir fd.
107 $dest_base_fd = $rootdir_fd;
108 $dest_dir = './' . $mountpoint->{mp};
109 } else {
110 # Assert that 'rootfs' is the first one:
111 die "foreach_mount() error\n" if $opt ne 'rootfs';
112
113 # Mount the rootfs absolutely.
114 # $rootdir is not controlled by the container, so this is fine.
115 sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY)
116 or die "failed to open '.': $!\n";
117 $dest_dir = $rootdir;
118 }
119
120 PVE::LXC::mountpoint_insert_staged(
121 $mount_fd,
122 $dest_base_fd,
123 $dest_dir,
124 $opt,
125 $rootuid,
126 $rootgid,
127 );
128
129 # From now on we mount inside our rootfs:
130 if (!$rootdir_fd) {
131 $rootdir_fd = $mount_fd;
132 }
133
134 push @$devices, $dev if $dev && $mountpoint->{quota};
135 };
136 }
137
138 PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
139
140 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
141 $lxc_setup->pre_start_hook();
142
143 if (PVE::CGroup::cgroup_mode() == 2) {
144 if (!$lxc_setup->unified_cgroupv2_support()) {
145 log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2"
146 ." environment! Please see documentation -> container -> cgroup version.");
147 syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n");
148 }
149 }
150
151 if (@$devices) {
152 my $devlist = '';
153 foreach my $dev (@$devices) {
154 my ($mode, $rdev) = (stat($dev))[2,6];
155 next if !$mode || !S_ISBLK($mode) || !$rdev;
156 my $major = PVE::Tools::dev_t_major($rdev);
157 my $minor = PVE::Tools::dev_t_minor($rdev);
158 $devlist .= "b:$major:$minor:$dev\n";
159 }
160 PVE::Tools::file_set_contents($devlist_file, $devlist);
161 }
162 });
163
164 # Leftover cgroups prevent lxc from starting without any useful information
165 # showing up in the journal, it is also often unable to properly clean them up
166 # at shutdown, so we do this here.
167 sub cleanup_cgroups($) {
168 my ($vmid) = @_;
169
170 if (PVE::CGroup::cgroup_mode() == 2) {
171 rmdir_recursive("/sys/fs/cgroup/lxc/$vmid");
172 rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid");
173 } else {
174 my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers();
175
176 my @controllers_cgv1 = keys %$v1;
177 foreach my $controller (@controllers_cgv1) {
178 $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd`
179 rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid");
180 rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid");
181 }
182
183 if ($v2) {
184 rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid");
185 rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid");
186 }
187 }
188 }
189
190 # FIXME: This is an ugly version without openat() because perl has no equivalent
191 # of fdopendir() so we cannot readdir from an openat() opened handle.
192 sub rmdir_recursive {
193 my ($path) = @_;
194
195 my $dh;
196 if (!opendir($dh, $path)) {
197 return if $!{ENOENT};
198 die "failed to open directory '$path': $!\n";
199 }
200
201 while (defined(my $entry = readdir($dh))) {
202 next if $entry eq '.' || $entry eq '..';
203 my $next = "$path/$entry";
204 next if ! -d $next;
205 rmdir_recursive($next);
206 }
207
208 rmdir($path) or die "failed to remove directory '$path': $!\n";
209 }