]>
Commit | Line | Data |
---|---|---|
deaf7667 AD |
1 | #!/usr/bin/perl |
2 | ||
b056d074 DM |
3 | package lxc_pve_prestart_hook; |
4 | ||
deaf7667 AD |
5 | use strict; |
6 | use warnings; | |
4ed2b825 | 7 | |
3bd97c10 | 8 | use Fcntl qw(O_DIRECTORY :mode); |
deaf7667 | 9 | use File::Path; |
3bd97c10 | 10 | use POSIX; |
deaf7667 | 11 | |
36def186 | 12 | use PVE::CGroup; |
deaf7667 | 13 | use PVE::Cluster; |
0a49c44e | 14 | use PVE::LXC::Config; |
c9a5774b | 15 | use PVE::LXC::Setup; |
0a49c44e WB |
16 | use PVE::LXC::Tools; |
17 | use PVE::LXC; | |
f8dcde1b | 18 | use PVE::RESTEnvironment; |
f7073b99 | 19 | use PVE::SafeSyslog; |
0a49c44e | 20 | use PVE::Storage; |
3bd97c10 WB |
21 | use PVE::Syscall qw(:fsmount); |
22 | use PVE::Tools qw(AT_FDCWD O_PATH); | |
deaf7667 | 23 | |
c717bffb TL |
24 | my $WARNFD; |
25 | sub log_warn { | |
26 | my ($vmid, $message) = @_; | |
27 | ||
28 | if (!defined($WARNFD)) { | |
29 | open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings"); | |
30 | } | |
31 | print $WARNFD "$message\n"; | |
32 | } | |
33 | ||
0a49c44e WB |
34 | PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { |
35 | my ($vmid, $vars, undef, undef) = @_; | |
c9a5774b | 36 | |
0a49c44e | 37 | my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid"; |
33c8cbfc | 38 | my $skiplock = -e $skiplock_flag_fn; |
0a49c44e | 39 | unlink $skiplock_flag_fn if $skiplock; |
c9a5774b | 40 | |
0a49c44e | 41 | PVE::Cluster::check_cfs_quorum(); # only start if we have quorum |
1e1fad99 | 42 | |
f8dcde1b DC |
43 | PVE::RESTEnvironment->setup_default_cli_env(); |
44 | ||
0a49c44e | 45 | return undef if ! -f PVE::LXC::Config->config_file($vmid); |
50df544c | 46 | |
0a49c44e WB |
47 | my $conf = PVE::LXC::Config->load_config($vmid); |
48 | if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) { | |
49 | PVE::LXC::Config->check_lock($conf); | |
50 | } | |
4c98d66c | 51 | |
51ae28ec WB |
52 | cleanup_cgroups($vmid); |
53 | ||
896823c4 | 54 | my $storage_cfg = PVE::Storage::config(); |
c9a5774b | 55 | |
0a49c44e | 56 | my $rootdir = $vars->{ROOTFS_PATH}; |
c9a5774b | 57 | |
0a49c44e WB |
58 | # Delete any leftover reboot-trigger file |
59 | unlink("/var/lib/lxc/$vmid/reboot"); | |
50df544c | 60 | |
0a49c44e WB |
61 | my $devlist_file = "/var/lib/lxc/$vmid/devices"; |
62 | unlink $devlist_file; | |
63 | my $devices = []; | |
deaf7667 | 64 | |
0a49c44e | 65 | my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); |
deaf7667 | 66 | |
0a49c44e WB |
67 | # Unmount first when the user mounted the container with "pct mount". |
68 | eval { | |
69 | PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {}); | |
70 | }; | |
deaf7667 | 71 | |
3bd97c10 WB |
72 | my $setup_mountpoint; |
73 | if (!PVE::LXC::Tools::can_use_new_mount_api()) { | |
74 | # Legacy mode for old kernels: | |
75 | $setup_mountpoint = sub { | |
76 | my ($opt, $mountpoint) = @_; | |
77 | ||
78 | my (undef, undef, $dev) = PVE::LXC::mountpoint_mount( | |
79 | $mountpoint, | |
80 | $rootdir, | |
81 | $storage_cfg, | |
82 | undef, | |
83 | $rootuid, | |
84 | $rootgid, | |
85 | ); | |
86 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
87 | }; | |
88 | } else { | |
89 | # With newer kernels we stage mount points and then use move_mount(). | |
90 | my $rootdir_fd = undef; | |
91 | $setup_mountpoint = sub { | |
92 | my ($opt, $mountpoint) = @_; | |
93 | ||
94 | my $dir = PVE::LXC::get_staging_mount_path($opt); | |
95 | my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage( | |
96 | $mountpoint, | |
97 | $dir, | |
98 | $storage_cfg, | |
99 | undef, | |
100 | $rootuid, | |
101 | $rootgid, | |
102 | ); | |
103 | ||
fe4cd0a7 | 104 | my ($dest_dir, $dest_base_fd); |
3bd97c10 WB |
105 | if ($rootdir_fd) { |
106 | # Mount relative to the rootdir fd. | |
fe4cd0a7 WB |
107 | $dest_base_fd = $rootdir_fd; |
108 | $dest_dir = './' . $mountpoint->{mp}; | |
3bd97c10 WB |
109 | } else { |
110 | # Assert that 'rootfs' is the first one: | |
111 | die "foreach_mount() error\n" if $opt ne 'rootfs'; | |
112 | ||
fe4cd0a7 | 113 | # Mount the rootfs absolutely. |
3bd97c10 | 114 | # $rootdir is not controlled by the container, so this is fine. |
fe4cd0a7 WB |
115 | sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY) |
116 | or die "failed to open '.': $!\n"; | |
117 | $dest_dir = $rootdir; | |
3bd97c10 WB |
118 | } |
119 | ||
120 | PVE::LXC::mountpoint_insert_staged( | |
121 | $mount_fd, | |
fe4cd0a7 WB |
122 | $dest_base_fd, |
123 | $dest_dir, | |
3bd97c10 WB |
124 | $opt, |
125 | $rootuid, | |
126 | $rootgid, | |
127 | ); | |
128 | ||
129 | # From now on we mount inside our rootfs: | |
130 | if (!$rootdir_fd) { | |
131 | $rootdir_fd = $mount_fd; | |
132 | } | |
133 | ||
134 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
135 | }; | |
136 | } | |
137 | ||
015740e6 | 138 | PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); |
deaf7667 | 139 | |
0a49c44e WB |
140 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); |
141 | $lxc_setup->pre_start_hook(); | |
deaf7667 | 142 | |
f7073b99 | 143 | if (PVE::CGroup::cgroup_mode() == 2) { |
e54a2ead TL |
144 | if (!$lxc_setup->unified_cgroupv2_support()) { |
145 | log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2" | |
146 | ." environment! Please see documentation -> container -> cgroup version."); | |
f7073b99 SI |
147 | syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n"); |
148 | } | |
149 | } | |
150 | ||
0a49c44e WB |
151 | if (@$devices) { |
152 | my $devlist = ''; | |
153 | foreach my $dev (@$devices) { | |
154 | my ($mode, $rdev) = (stat($dev))[2,6]; | |
155 | next if !$mode || !S_ISBLK($mode) || !$rdev; | |
156 | my $major = PVE::Tools::dev_t_major($rdev); | |
157 | my $minor = PVE::Tools::dev_t_minor($rdev); | |
158 | $devlist .= "b:$major:$minor:$dev\n"; | |
159 | } | |
160 | PVE::Tools::file_set_contents($devlist_file, $devlist); | |
161 | } | |
162 | }); | |
51ae28ec WB |
163 | |
164 | # Leftover cgroups prevent lxc from starting without any useful information | |
165 | # showing up in the journal, it is also often unable to properly clean them up | |
166 | # at shutdown, so we do this here. | |
167 | sub cleanup_cgroups($) { | |
168 | my ($vmid) = @_; | |
169 | ||
85ccb17f | 170 | if (PVE::CGroup::cgroup_mode() == 2) { |
51ae28ec WB |
171 | rmdir_recursive("/sys/fs/cgroup/lxc/$vmid"); |
172 | rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid"); | |
173 | } else { | |
36def186 | 174 | my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers(); |
51ae28ec WB |
175 | |
176 | my @controllers_cgv1 = keys %$v1; | |
177 | foreach my $controller (@controllers_cgv1) { | |
178 | $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd` | |
de16102c WB |
179 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid"); |
180 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid"); | |
51ae28ec WB |
181 | } |
182 | ||
183 | if ($v2) { | |
184 | rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid"); | |
185 | rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid"); | |
186 | } | |
187 | } | |
188 | } | |
189 | ||
190 | # FIXME: This is an ugly version without openat() because perl has no equivalent | |
191 | # of fdopendir() so we cannot readdir from an openat() opened handle. | |
192 | sub rmdir_recursive { | |
193 | my ($path) = @_; | |
194 | ||
195 | my $dh; | |
196 | if (!opendir($dh, $path)) { | |
197 | return if $!{ENOENT}; | |
198 | die "failed to open directory '$path': $!\n"; | |
199 | } | |
200 | ||
201 | while (defined(my $entry = readdir($dh))) { | |
202 | next if $entry eq '.' || $entry eq '..'; | |
203 | my $next = "$path/$entry"; | |
204 | next if ! -d $next; | |
205 | rmdir_recursive($next); | |
206 | } | |
207 | ||
208 | rmdir($path) or die "failed to remove directory '$path': $!\n"; | |
209 | } |