]> git.proxmox.com Git - pve-container.git/blob - src/lxc-pve-prestart-hook
bump pve-common dep to 8.1.0
[pve-container.git] / src / lxc-pve-prestart-hook
1 #!/usr/bin/perl
2
3 package lxc_pve_prestart_hook;
4
5 use strict;
6 use warnings;
7
8 use Fcntl qw(O_DIRECTORY :mode);
9 use File::Basename;
10 use File::Path;
11 use POSIX;
12
13 use PVE::CGroup;
14 use PVE::Cluster;
15 use PVE::LXC::Config;
16 use PVE::LXC::Setup;
17 use PVE::LXC::Tools;
18 use PVE::LXC;
19 use PVE::RESTEnvironment;
20 use PVE::SafeSyslog;
21 use PVE::Storage;
22 use PVE::Syscall qw(:fsmount);
23 use PVE::Tools qw(AT_FDCWD O_PATH);
24
25 my $WARNFD;
26 sub log_warn {
27 my ($vmid, $message) = @_;
28
29 if (!defined($WARNFD)) {
30 open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings");
31 }
32 print $WARNFD "$message\n";
33 }
34
35 PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
36 my ($vmid, $vars, undef, undef) = @_;
37
38 my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
39 my $skiplock = -e $skiplock_flag_fn;
40 unlink $skiplock_flag_fn if $skiplock;
41
42 PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
43
44 PVE::RESTEnvironment->setup_default_cli_env();
45
46 return undef if ! -f PVE::LXC::Config->config_file($vmid);
47
48 my $conf = PVE::LXC::Config->load_config($vmid);
49 if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) {
50 PVE::LXC::Config->check_lock($conf);
51 }
52
53 cleanup_cgroups($vmid);
54
55 my $storage_cfg = PVE::Storage::config();
56
57 my $rootdir = $vars->{ROOTFS_PATH};
58
59 # Delete any leftover reboot-trigger file
60 unlink("/var/lib/lxc/$vmid/reboot");
61
62 # Delete the old device list file
63 # in case it was left over from a previous version of pve-container.
64 unlink("/var/lib/lxc/$vmid/devices");
65
66 my $devices = [];
67
68 my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
69
70 # Unmount first when the user mounted the container with "pct mount".
71 eval {
72 PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
73 };
74
75 my $rootdir_fd = undef;
76 my $setup_mountpoint = sub {
77 my ($opt, $mountpoint) = @_;
78
79 my $dir = PVE::LXC::get_staging_mount_path($opt);
80 my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
81 $mountpoint,
82 $dir,
83 $storage_cfg,
84 undef,
85 $rootuid,
86 $rootgid,
87 );
88
89 my ($dest_dir, $dest_base_fd);
90 if ($rootdir_fd) {
91 # Mount relative to the rootdir fd.
92 $dest_base_fd = $rootdir_fd;
93 $dest_dir = './' . $mountpoint->{mp};
94 } else {
95 # Assert that 'rootfs' is the first one:
96 die "foreach_mount() error\n" if $opt ne 'rootfs';
97
98 # Mount the rootfs absolutely.
99 # $rootdir is not controlled by the container, so this is fine.
100 sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY)
101 or die "failed to open '.': $!\n";
102 $dest_dir = $rootdir;
103 }
104
105 PVE::LXC::mountpoint_insert_staged(
106 $mount_fd,
107 $dest_base_fd,
108 $dest_dir,
109 $opt,
110 $rootuid,
111 $rootgid,
112 );
113
114 # From now on we mount inside our rootfs:
115 if (!$rootdir_fd) {
116 $rootdir_fd = $mount_fd;
117 }
118
119 push @$devices, $dev if $dev && $mountpoint->{quota};
120 };
121
122 PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
123
124 # Device passthrough
125 my $passthrough_devices = [];
126
127 my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
128 File::Path::make_path($passthrough_dir);
129 PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k")
130 or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!");
131
132 my $setup_passthrough_device = sub {
133 my ($key, $device) = @_;
134
135 my $absolute_path = $device->{path};
136 my ($mode, $rdev) = (stat($absolute_path))[2, 6];
137
138 die "Could not get mode or device ID of $absolute_path\n"
139 if (!defined($mode) || !defined($rdev));
140
141 my $passthrough_device_path = $passthrough_dir . $absolute_path;
142 File::Path::make_path(dirname($passthrough_device_path));
143 PVE::Tools::mknod($passthrough_device_path, $mode, $rdev)
144 or die("failed to mknod $passthrough_device_path: $!\n");
145
146 # Use chmod because umask could mess with the access mode on mknod
147 my $passthrough_mode = 0660;
148 $passthrough_mode = oct($device->{mode}) if defined($device->{mode});
149 chmod $passthrough_mode, $passthrough_device_path
150 or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n";
151
152 # Set uid and gid of the device node
153 my $uid = 0;
154 my $gid = 0;
155 $uid = $device->{uid} if defined($device->{uid});
156 $gid = $device->{gid} if defined($device->{gid});
157 $uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map);
158 $gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map);
159 chown $uid, $gid, $passthrough_device_path
160 or die("failed to chown $uid:$gid $passthrough_device_path: $!\n");
161
162 push @$passthrough_devices, [$absolute_path, $mode, $rdev];
163 };
164
165 PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device);
166
167 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
168 $lxc_setup->pre_start_hook();
169
170 if (PVE::CGroup::cgroup_mode() == 2) {
171 if (!$lxc_setup->unified_cgroupv2_support()) {
172 log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2"
173 ." environment! Please see documentation -> container -> cgroup version.");
174 syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n");
175 }
176 }
177
178 if (@$devices) {
179 my $devlist = '';
180 foreach my $dev (@$devices) {
181 my ($mode, $rdev) = (stat($dev))[2,6];
182 next if !$mode || !S_ISBLK($mode) || !$rdev;
183 my $major = PVE::Tools::dev_t_major($rdev);
184 my $minor = PVE::Tools::dev_t_minor($rdev);
185 $devlist .= "b:$major:$minor:$dev\n";
186 }
187 PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mounts", $devlist);
188 }
189
190 if (@$passthrough_devices) {
191 my $devlist = '';
192 for my $dev (@$passthrough_devices) {
193 my ($path, $mode, $rdev) = @$dev;
194 my $major = PVE::Tools::dev_t_major($rdev);
195 my $minor = PVE::Tools::dev_t_minor($rdev);
196 my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
197 $devlist .= "$device_type_char:$major:$minor:$path\n";
198 }
199 PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist);
200 }
201 });
202
203 # Leftover cgroups prevent lxc from starting without any useful information
204 # showing up in the journal, it is also often unable to properly clean them up
205 # at shutdown, so we do this here.
206 sub cleanup_cgroups($) {
207 my ($vmid) = @_;
208
209 if (PVE::CGroup::cgroup_mode() == 2) {
210 rmdir_recursive("/sys/fs/cgroup/lxc/$vmid");
211 rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid");
212 } else {
213 my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers();
214
215 my @controllers_cgv1 = keys %$v1;
216 foreach my $controller (@controllers_cgv1) {
217 $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd`
218 rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid");
219 rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid");
220 }
221
222 if ($v2) {
223 rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid");
224 rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid");
225 }
226 }
227 }
228
229 # FIXME: This is an ugly version without openat() because perl has no equivalent
230 # of fdopendir() so we cannot readdir from an openat() opened handle.
231 sub rmdir_recursive {
232 my ($path) = @_;
233
234 my $dh;
235 if (!opendir($dh, $path)) {
236 return if $!{ENOENT};
237 die "failed to open directory '$path': $!\n";
238 }
239
240 while (defined(my $entry = readdir($dh))) {
241 next if $entry eq '.' || $entry eq '..';
242 my $next = "$path/$entry";
243 next if ! -d $next;
244 rmdir_recursive($next);
245 }
246
247 rmdir($path) or die "failed to remove directory '$path': $!\n";
248 }