]> git.proxmox.com Git - pve-container.git/blame - src/lxc-pve-prestart-hook
cleanup cgroups in pre-start hook
[pve-container.git] / src / lxc-pve-prestart-hook
CommitLineData
deaf7667
AD
1#!/usr/bin/perl
2
b056d074
DM
3package lxc_pve_prestart_hook;
4
deaf7667
AD
5use strict;
6use warnings;
4ed2b825 7
3bd97c10 8use Fcntl qw(O_DIRECTORY :mode);
deaf7667 9use File::Path;
3bd97c10 10use POSIX;
deaf7667 11
deaf7667 12use PVE::Cluster;
0a49c44e 13use PVE::LXC::Config;
c9a5774b 14use PVE::LXC::Setup;
0a49c44e
WB
15use PVE::LXC::Tools;
16use PVE::LXC;
17use PVE::Storage;
3bd97c10
WB
18use PVE::Syscall qw(:fsmount);
19use PVE::Tools qw(AT_FDCWD O_PATH);
deaf7667 20
0a49c44e
WB
21PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
22 my ($vmid, $vars, undef, undef) = @_;
c9a5774b 23
0a49c44e
WB
24 my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
25 my $skiplock = 1 if -e $skiplock_flag_fn;
26 unlink $skiplock_flag_fn if $skiplock;
c9a5774b 27
0a49c44e 28 PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
1e1fad99 29
0a49c44e 30 return undef if ! -f PVE::LXC::Config->config_file($vmid);
50df544c 31
0a49c44e
WB
32 my $conf = PVE::LXC::Config->load_config($vmid);
33 if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) {
34 PVE::LXC::Config->check_lock($conf);
35 }
4c98d66c 36
51ae28ec
WB
37 cleanup_cgroups($vmid);
38
896823c4 39 my $storage_cfg = PVE::Storage::config();
c9a5774b 40
0a49c44e
WB
41 my $vollist = PVE::LXC::Config->get_vm_volumes($conf);
42 my $loopdevlist = PVE::LXC::Config->get_vm_volumes($conf, 'rootfs');
c9a5774b 43
0a49c44e 44 PVE::Storage::activate_volumes($storage_cfg, $vollist);
3c99150a 45
0a49c44e 46 my $rootdir = $vars->{ROOTFS_PATH};
c9a5774b 47
0a49c44e
WB
48 # Delete any leftover reboot-trigger file
49 unlink("/var/lib/lxc/$vmid/reboot");
50df544c 50
0a49c44e
WB
51 my $devlist_file = "/var/lib/lxc/$vmid/devices";
52 unlink $devlist_file;
53 my $devices = [];
deaf7667 54
0a49c44e 55 my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
deaf7667 56
0a49c44e
WB
57 # Unmount first when the user mounted the container with "pct mount".
58 eval {
59 PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
60 };
deaf7667 61
3bd97c10
WB
62 my $setup_mountpoint;
63 if (!PVE::LXC::Tools::can_use_new_mount_api()) {
64 # Legacy mode for old kernels:
65 $setup_mountpoint = sub {
66 my ($opt, $mountpoint) = @_;
67
68 my (undef, undef, $dev) = PVE::LXC::mountpoint_mount(
69 $mountpoint,
70 $rootdir,
71 $storage_cfg,
72 undef,
73 $rootuid,
74 $rootgid,
75 );
76 push @$devices, $dev if $dev && $mountpoint->{quota};
77 };
78 } else {
79 # With newer kernels we stage mount points and then use move_mount().
80 my $rootdir_fd = undef;
81 $setup_mountpoint = sub {
82 my ($opt, $mountpoint) = @_;
83
84 my $dir = PVE::LXC::get_staging_mount_path($opt);
85 my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
86 $mountpoint,
87 $dir,
88 $storage_cfg,
89 undef,
90 $rootuid,
91 $rootgid,
92 );
93
fe4cd0a7 94 my ($dest_dir, $dest_base_fd);
3bd97c10
WB
95 if ($rootdir_fd) {
96 # Mount relative to the rootdir fd.
fe4cd0a7
WB
97 $dest_base_fd = $rootdir_fd;
98 $dest_dir = './' . $mountpoint->{mp};
3bd97c10
WB
99 } else {
100 # Assert that 'rootfs' is the first one:
101 die "foreach_mount() error\n" if $opt ne 'rootfs';
102
fe4cd0a7 103 # Mount the rootfs absolutely.
3bd97c10 104 # $rootdir is not controlled by the container, so this is fine.
fe4cd0a7
WB
105 sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY)
106 or die "failed to open '.': $!\n";
107 $dest_dir = $rootdir;
3bd97c10
WB
108 }
109
110 PVE::LXC::mountpoint_insert_staged(
111 $mount_fd,
fe4cd0a7
WB
112 $dest_base_fd,
113 $dest_dir,
3bd97c10
WB
114 $opt,
115 $rootuid,
116 $rootgid,
117 );
118
119 # From now on we mount inside our rootfs:
120 if (!$rootdir_fd) {
121 $rootdir_fd = $mount_fd;
122 }
123
124 push @$devices, $dev if $dev && $mountpoint->{quota};
125 };
126 }
127
0a49c44e 128 PVE::LXC::Config->foreach_mountpoint($conf, $setup_mountpoint);
deaf7667 129
0a49c44e
WB
130 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
131 $lxc_setup->pre_start_hook();
deaf7667 132
0a49c44e
WB
133 if (@$devices) {
134 my $devlist = '';
135 foreach my $dev (@$devices) {
136 my ($mode, $rdev) = (stat($dev))[2,6];
137 next if !$mode || !S_ISBLK($mode) || !$rdev;
138 my $major = PVE::Tools::dev_t_major($rdev);
139 my $minor = PVE::Tools::dev_t_minor($rdev);
140 $devlist .= "b:$major:$minor:$dev\n";
141 }
142 PVE::Tools::file_set_contents($devlist_file, $devlist);
143 }
144});
51ae28ec
WB
145
146# Leftover cgroups prevent lxc from starting without any useful information
147# showing up in the journal, it is also often unable to properly clean them up
148# at shutdown, so we do this here.
149sub cleanup_cgroups($) {
150 my ($vmid) = @_;
151
152 if (PVE::LXC::CGroup::cgroup_mode() == 2) {
153 rmdir_recursive("/sys/fs/cgroup/lxc/$vmid");
154 rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid");
155 } else {
156 my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems();
157
158 my @controllers_cgv1 = keys %$v1;
159 foreach my $controller (@controllers_cgv1) {
160 $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd`
161 my $cgpath = "/sys/fs/cgroup/$controller/lxc/$vmid";
162 rmdir_recursive($cgpath);
163 }
164
165 if ($v2) {
166 rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid");
167 rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid");
168 }
169 }
170}
171
172# FIXME: This is an ugly version without openat() because perl has no equivalent
173# of fdopendir() so we cannot readdir from an openat() opened handle.
174sub rmdir_recursive {
175 my ($path) = @_;
176
177 my $dh;
178 if (!opendir($dh, $path)) {
179 return if $!{ENOENT};
180 die "failed to open directory '$path': $!\n";
181 }
182
183 while (defined(my $entry = readdir($dh))) {
184 next if $entry eq '.' || $entry eq '..';
185 my $next = "$path/$entry";
186 next if ! -d $next;
187 rmdir_recursive($next);
188 }
189
190 rmdir($path) or die "failed to remove directory '$path': $!\n";
191}