]> git.proxmox.com Git - pve-container.git/blame - src/lxc-pve-prestart-hook
prestart-hook: detect cgroupv2 incompatible systemd version
[pve-container.git] / src / lxc-pve-prestart-hook
CommitLineData
deaf7667
AD
1#!/usr/bin/perl
2
b056d074
DM
3package lxc_pve_prestart_hook;
4
deaf7667
AD
5use strict;
6use warnings;
4ed2b825 7
3bd97c10 8use Fcntl qw(O_DIRECTORY :mode);
deaf7667 9use File::Path;
3bd97c10 10use POSIX;
deaf7667 11
36def186 12use PVE::CGroup;
deaf7667 13use PVE::Cluster;
0a49c44e 14use PVE::LXC::Config;
c9a5774b 15use PVE::LXC::Setup;
0a49c44e
WB
16use PVE::LXC::Tools;
17use PVE::LXC;
f7073b99 18use PVE::SafeSyslog;
0a49c44e 19use PVE::Storage;
3bd97c10
WB
20use PVE::Syscall qw(:fsmount);
21use PVE::Tools qw(AT_FDCWD O_PATH);
deaf7667 22
0a49c44e
WB
23PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
24 my ($vmid, $vars, undef, undef) = @_;
c9a5774b 25
0a49c44e
WB
26 my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
27 my $skiplock = 1 if -e $skiplock_flag_fn;
28 unlink $skiplock_flag_fn if $skiplock;
c9a5774b 29
0a49c44e 30 PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
1e1fad99 31
0a49c44e 32 return undef if ! -f PVE::LXC::Config->config_file($vmid);
50df544c 33
0a49c44e
WB
34 my $conf = PVE::LXC::Config->load_config($vmid);
35 if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) {
36 PVE::LXC::Config->check_lock($conf);
37 }
4c98d66c 38
51ae28ec
WB
39 cleanup_cgroups($vmid);
40
896823c4 41 my $storage_cfg = PVE::Storage::config();
c9a5774b 42
0a49c44e 43 my $rootdir = $vars->{ROOTFS_PATH};
c9a5774b 44
0a49c44e
WB
45 # Delete any leftover reboot-trigger file
46 unlink("/var/lib/lxc/$vmid/reboot");
50df544c 47
0a49c44e
WB
48 my $devlist_file = "/var/lib/lxc/$vmid/devices";
49 unlink $devlist_file;
50 my $devices = [];
deaf7667 51
0a49c44e 52 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
deaf7667 53
0a49c44e
WB
54 # Unmount first when the user mounted the container with "pct mount".
55 eval {
56 PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
57 };
deaf7667 58
3bd97c10
WB
59 my $setup_mountpoint;
60 if (!PVE::LXC::Tools::can_use_new_mount_api()) {
61 # Legacy mode for old kernels:
62 $setup_mountpoint = sub {
63 my ($opt, $mountpoint) = @_;
64
65 my (undef, undef, $dev) = PVE::LXC::mountpoint_mount(
66 $mountpoint,
67 $rootdir,
68 $storage_cfg,
69 undef,
70 $rootuid,
71 $rootgid,
72 );
73 push @$devices, $dev if $dev && $mountpoint->{quota};
74 };
75 } else {
76 # With newer kernels we stage mount points and then use move_mount().
77 my $rootdir_fd = undef;
78 $setup_mountpoint = sub {
79 my ($opt, $mountpoint) = @_;
80
81 my $dir = PVE::LXC::get_staging_mount_path($opt);
82 my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
83 $mountpoint,
84 $dir,
85 $storage_cfg,
86 undef,
87 $rootuid,
88 $rootgid,
89 );
90
fe4cd0a7 91 my ($dest_dir, $dest_base_fd);
3bd97c10
WB
92 if ($rootdir_fd) {
93 # Mount relative to the rootdir fd.
fe4cd0a7
WB
94 $dest_base_fd = $rootdir_fd;
95 $dest_dir = './' . $mountpoint->{mp};
3bd97c10
WB
96 } else {
97 # Assert that 'rootfs' is the first one:
98 die "foreach_mount() error\n" if $opt ne 'rootfs';
99
fe4cd0a7 100 # Mount the rootfs absolutely.
3bd97c10 101 # $rootdir is not controlled by the container, so this is fine.
fe4cd0a7
WB
102 sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY)
103 or die "failed to open '.': $!\n";
104 $dest_dir = $rootdir;
3bd97c10
WB
105 }
106
107 PVE::LXC::mountpoint_insert_staged(
108 $mount_fd,
fe4cd0a7
WB
109 $dest_base_fd,
110 $dest_dir,
3bd97c10
WB
111 $opt,
112 $rootuid,
113 $rootgid,
114 );
115
116 # From now on we mount inside our rootfs:
117 if (!$rootdir_fd) {
118 $rootdir_fd = $mount_fd;
119 }
120
121 push @$devices, $dev if $dev && $mountpoint->{quota};
122 };
123 }
124
015740e6 125 PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
deaf7667 126
0a49c44e
WB
127 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
128 $lxc_setup->pre_start_hook();
deaf7667 129
f7073b99
SI
130 if (PVE::CGroup::cgroup_mode() == 2) {
131 if(!$lxc_setup->unified_cgroupv2_support()) {
132 syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n");
133 }
134 }
135
0a49c44e
WB
136 if (@$devices) {
137 my $devlist = '';
138 foreach my $dev (@$devices) {
139 my ($mode, $rdev) = (stat($dev))[2,6];
140 next if !$mode || !S_ISBLK($mode) || !$rdev;
141 my $major = PVE::Tools::dev_t_major($rdev);
142 my $minor = PVE::Tools::dev_t_minor($rdev);
143 $devlist .= "b:$major:$minor:$dev\n";
144 }
145 PVE::Tools::file_set_contents($devlist_file, $devlist);
146 }
147});
51ae28ec
WB
148
149# Leftover cgroups prevent lxc from starting without any useful information
150# showing up in the journal, it is also often unable to properly clean them up
151# at shutdown, so we do this here.
152sub cleanup_cgroups($) {
153 my ($vmid) = @_;
154
85ccb17f 155 if (PVE::CGroup::cgroup_mode() == 2) {
51ae28ec
WB
156 rmdir_recursive("/sys/fs/cgroup/lxc/$vmid");
157 rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid");
158 } else {
36def186 159 my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers();
51ae28ec
WB
160
161 my @controllers_cgv1 = keys %$v1;
162 foreach my $controller (@controllers_cgv1) {
163 $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd`
de16102c
WB
164 rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid");
165 rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid");
51ae28ec
WB
166 }
167
168 if ($v2) {
169 rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid");
170 rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid");
171 }
172 }
173}
174
175# FIXME: This is an ugly version without openat() because perl has no equivalent
176# of fdopendir() so we cannot readdir from an openat() opened handle.
177sub rmdir_recursive {
178 my ($path) = @_;
179
180 my $dh;
181 if (!opendir($dh, $path)) {
182 return if $!{ENOENT};
183 die "failed to open directory '$path': $!\n";
184 }
185
186 while (defined(my $entry = readdir($dh))) {
187 next if $entry eq '.' || $entry eq '..';
188 my $next = "$path/$entry";
189 next if ! -d $next;
190 rmdir_recursive($next);
191 }
192
193 rmdir($path) or die "failed to remove directory '$path': $!\n";
194}