]>
Commit | Line | Data |
---|---|---|
deaf7667 AD |
1 | #!/usr/bin/perl |
2 | ||
b056d074 DM |
3 | package lxc_pve_prestart_hook; |
4 | ||
deaf7667 AD |
5 | use strict; |
6 | use warnings; | |
4ed2b825 | 7 | |
3bd97c10 | 8 | use Fcntl qw(O_DIRECTORY :mode); |
deaf7667 | 9 | use File::Path; |
3bd97c10 | 10 | use POSIX; |
deaf7667 | 11 | |
36def186 | 12 | use PVE::CGroup; |
deaf7667 | 13 | use PVE::Cluster; |
0a49c44e | 14 | use PVE::LXC::Config; |
c9a5774b | 15 | use PVE::LXC::Setup; |
0a49c44e WB |
16 | use PVE::LXC::Tools; |
17 | use PVE::LXC; | |
f7073b99 | 18 | use PVE::SafeSyslog; |
0a49c44e | 19 | use PVE::Storage; |
3bd97c10 WB |
20 | use PVE::Syscall qw(:fsmount); |
21 | use PVE::Tools qw(AT_FDCWD O_PATH); | |
deaf7667 | 22 | |
c717bffb TL |
23 | my $WARNFD; |
24 | sub log_warn { | |
25 | my ($vmid, $message) = @_; | |
26 | ||
27 | if (!defined($WARNFD)) { | |
28 | open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings"); | |
29 | } | |
30 | print $WARNFD "$message\n"; | |
31 | } | |
32 | ||
0a49c44e WB |
33 | PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { |
34 | my ($vmid, $vars, undef, undef) = @_; | |
c9a5774b | 35 | |
0a49c44e WB |
36 | my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid"; |
37 | my $skiplock = 1 if -e $skiplock_flag_fn; | |
38 | unlink $skiplock_flag_fn if $skiplock; | |
c9a5774b | 39 | |
0a49c44e | 40 | PVE::Cluster::check_cfs_quorum(); # only start if we have quorum |
1e1fad99 | 41 | |
0a49c44e | 42 | return undef if ! -f PVE::LXC::Config->config_file($vmid); |
50df544c | 43 | |
0a49c44e WB |
44 | my $conf = PVE::LXC::Config->load_config($vmid); |
45 | if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) { | |
46 | PVE::LXC::Config->check_lock($conf); | |
47 | } | |
4c98d66c | 48 | |
51ae28ec WB |
49 | cleanup_cgroups($vmid); |
50 | ||
896823c4 | 51 | my $storage_cfg = PVE::Storage::config(); |
c9a5774b | 52 | |
0a49c44e | 53 | my $rootdir = $vars->{ROOTFS_PATH}; |
c9a5774b | 54 | |
0a49c44e WB |
55 | # Delete any leftover reboot-trigger file |
56 | unlink("/var/lib/lxc/$vmid/reboot"); | |
50df544c | 57 | |
0a49c44e WB |
58 | my $devlist_file = "/var/lib/lxc/$vmid/devices"; |
59 | unlink $devlist_file; | |
60 | my $devices = []; | |
deaf7667 | 61 | |
0a49c44e | 62 | my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); |
deaf7667 | 63 | |
0a49c44e WB |
64 | # Unmount first when the user mounted the container with "pct mount". |
65 | eval { | |
66 | PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {}); | |
67 | }; | |
deaf7667 | 68 | |
3bd97c10 WB |
69 | my $setup_mountpoint; |
70 | if (!PVE::LXC::Tools::can_use_new_mount_api()) { | |
71 | # Legacy mode for old kernels: | |
72 | $setup_mountpoint = sub { | |
73 | my ($opt, $mountpoint) = @_; | |
74 | ||
75 | my (undef, undef, $dev) = PVE::LXC::mountpoint_mount( | |
76 | $mountpoint, | |
77 | $rootdir, | |
78 | $storage_cfg, | |
79 | undef, | |
80 | $rootuid, | |
81 | $rootgid, | |
82 | ); | |
83 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
84 | }; | |
85 | } else { | |
86 | # With newer kernels we stage mount points and then use move_mount(). | |
87 | my $rootdir_fd = undef; | |
88 | $setup_mountpoint = sub { | |
89 | my ($opt, $mountpoint) = @_; | |
90 | ||
91 | my $dir = PVE::LXC::get_staging_mount_path($opt); | |
92 | my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage( | |
93 | $mountpoint, | |
94 | $dir, | |
95 | $storage_cfg, | |
96 | undef, | |
97 | $rootuid, | |
98 | $rootgid, | |
99 | ); | |
100 | ||
fe4cd0a7 | 101 | my ($dest_dir, $dest_base_fd); |
3bd97c10 WB |
102 | if ($rootdir_fd) { |
103 | # Mount relative to the rootdir fd. | |
fe4cd0a7 WB |
104 | $dest_base_fd = $rootdir_fd; |
105 | $dest_dir = './' . $mountpoint->{mp}; | |
3bd97c10 WB |
106 | } else { |
107 | # Assert that 'rootfs' is the first one: | |
108 | die "foreach_mount() error\n" if $opt ne 'rootfs'; | |
109 | ||
fe4cd0a7 | 110 | # Mount the rootfs absolutely. |
3bd97c10 | 111 | # $rootdir is not controlled by the container, so this is fine. |
fe4cd0a7 WB |
112 | sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY) |
113 | or die "failed to open '.': $!\n"; | |
114 | $dest_dir = $rootdir; | |
3bd97c10 WB |
115 | } |
116 | ||
117 | PVE::LXC::mountpoint_insert_staged( | |
118 | $mount_fd, | |
fe4cd0a7 WB |
119 | $dest_base_fd, |
120 | $dest_dir, | |
3bd97c10 WB |
121 | $opt, |
122 | $rootuid, | |
123 | $rootgid, | |
124 | ); | |
125 | ||
126 | # From now on we mount inside our rootfs: | |
127 | if (!$rootdir_fd) { | |
128 | $rootdir_fd = $mount_fd; | |
129 | } | |
130 | ||
131 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
132 | }; | |
133 | } | |
134 | ||
015740e6 | 135 | PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); |
deaf7667 | 136 | |
0a49c44e WB |
137 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); |
138 | $lxc_setup->pre_start_hook(); | |
deaf7667 | 139 | |
f7073b99 | 140 | if (PVE::CGroup::cgroup_mode() == 2) { |
e54a2ead TL |
141 | if (!$lxc_setup->unified_cgroupv2_support()) { |
142 | log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2" | |
143 | ." environment! Please see documentation -> container -> cgroup version."); | |
f7073b99 SI |
144 | syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n"); |
145 | } | |
146 | } | |
147 | ||
0a49c44e WB |
148 | if (@$devices) { |
149 | my $devlist = ''; | |
150 | foreach my $dev (@$devices) { | |
151 | my ($mode, $rdev) = (stat($dev))[2,6]; | |
152 | next if !$mode || !S_ISBLK($mode) || !$rdev; | |
153 | my $major = PVE::Tools::dev_t_major($rdev); | |
154 | my $minor = PVE::Tools::dev_t_minor($rdev); | |
155 | $devlist .= "b:$major:$minor:$dev\n"; | |
156 | } | |
157 | PVE::Tools::file_set_contents($devlist_file, $devlist); | |
158 | } | |
159 | }); | |
51ae28ec WB |
160 | |
161 | # Leftover cgroups prevent lxc from starting without any useful information | |
162 | # showing up in the journal, it is also often unable to properly clean them up | |
163 | # at shutdown, so we do this here. | |
164 | sub cleanup_cgroups($) { | |
165 | my ($vmid) = @_; | |
166 | ||
85ccb17f | 167 | if (PVE::CGroup::cgroup_mode() == 2) { |
51ae28ec WB |
168 | rmdir_recursive("/sys/fs/cgroup/lxc/$vmid"); |
169 | rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid"); | |
170 | } else { | |
36def186 | 171 | my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers(); |
51ae28ec WB |
172 | |
173 | my @controllers_cgv1 = keys %$v1; | |
174 | foreach my $controller (@controllers_cgv1) { | |
175 | $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd` | |
de16102c WB |
176 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid"); |
177 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid"); | |
51ae28ec WB |
178 | } |
179 | ||
180 | if ($v2) { | |
181 | rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid"); | |
182 | rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid"); | |
183 | } | |
184 | } | |
185 | } | |
186 | ||
187 | # FIXME: This is an ugly version without openat() because perl has no equivalent | |
188 | # of fdopendir() so we cannot readdir from an openat() opened handle. | |
189 | sub rmdir_recursive { | |
190 | my ($path) = @_; | |
191 | ||
192 | my $dh; | |
193 | if (!opendir($dh, $path)) { | |
194 | return if $!{ENOENT}; | |
195 | die "failed to open directory '$path': $!\n"; | |
196 | } | |
197 | ||
198 | while (defined(my $entry = readdir($dh))) { | |
199 | next if $entry eq '.' || $entry eq '..'; | |
200 | my $next = "$path/$entry"; | |
201 | next if ! -d $next; | |
202 | rmdir_recursive($next); | |
203 | } | |
204 | ||
205 | rmdir($path) or die "failed to remove directory '$path': $!\n"; | |
206 | } |