]>
Commit | Line | Data |
---|---|---|
deaf7667 AD |
1 | #!/usr/bin/perl |
2 | ||
b056d074 DM |
3 | package lxc_pve_prestart_hook; |
4 | ||
deaf7667 AD |
5 | use strict; |
6 | use warnings; | |
4ed2b825 | 7 | |
3bd97c10 | 8 | use Fcntl qw(O_DIRECTORY :mode); |
ce1976b8 | 9 | use File::Basename; |
deaf7667 | 10 | use File::Path; |
3bd97c10 | 11 | use POSIX; |
deaf7667 | 12 | |
36def186 | 13 | use PVE::CGroup; |
deaf7667 | 14 | use PVE::Cluster; |
0a49c44e | 15 | use PVE::LXC::Config; |
c9a5774b | 16 | use PVE::LXC::Setup; |
0a49c44e WB |
17 | use PVE::LXC::Tools; |
18 | use PVE::LXC; | |
f8dcde1b | 19 | use PVE::RESTEnvironment; |
f7073b99 | 20 | use PVE::SafeSyslog; |
0a49c44e | 21 | use PVE::Storage; |
3bd97c10 WB |
22 | use PVE::Syscall qw(:fsmount); |
23 | use PVE::Tools qw(AT_FDCWD O_PATH); | |
deaf7667 | 24 | |
e77973f4 AD |
25 | my $have_sdn; |
26 | eval { | |
27 | require PVE::Network::SDN::Vnets; | |
28 | $have_sdn = 1; | |
29 | }; | |
30 | ||
c717bffb TL |
31 | my $WARNFD; |
32 | sub log_warn { | |
33 | my ($vmid, $message) = @_; | |
34 | ||
35 | if (!defined($WARNFD)) { | |
36 | open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings"); | |
37 | } | |
38 | print $WARNFD "$message\n"; | |
39 | } | |
40 | ||
0a49c44e WB |
41 | PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { |
42 | my ($vmid, $vars, undef, undef) = @_; | |
c9a5774b | 43 | |
0a49c44e | 44 | my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid"; |
33c8cbfc | 45 | my $skiplock = -e $skiplock_flag_fn; |
0a49c44e | 46 | unlink $skiplock_flag_fn if $skiplock; |
c9a5774b | 47 | |
0a49c44e | 48 | PVE::Cluster::check_cfs_quorum(); # only start if we have quorum |
1e1fad99 | 49 | |
f8dcde1b DC |
50 | PVE::RESTEnvironment->setup_default_cli_env(); |
51 | ||
0a49c44e | 52 | return undef if ! -f PVE::LXC::Config->config_file($vmid); |
50df544c | 53 | |
0a49c44e WB |
54 | my $conf = PVE::LXC::Config->load_config($vmid); |
55 | if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) { | |
56 | PVE::LXC::Config->check_lock($conf); | |
57 | } | |
4c98d66c | 58 | |
51ae28ec WB |
59 | cleanup_cgroups($vmid); |
60 | ||
896823c4 | 61 | my $storage_cfg = PVE::Storage::config(); |
c9a5774b | 62 | |
0a49c44e | 63 | my $rootdir = $vars->{ROOTFS_PATH}; |
c9a5774b | 64 | |
0a49c44e WB |
65 | # Delete any leftover reboot-trigger file |
66 | unlink("/var/lib/lxc/$vmid/reboot"); | |
50df544c | 67 | |
ce1976b8 FS |
68 | # Delete the old device list file |
69 | # in case it was left over from a previous version of pve-container. | |
70 | unlink("/var/lib/lxc/$vmid/devices"); | |
71 | ||
0a49c44e | 72 | my $devices = []; |
deaf7667 | 73 | |
eda68a08 | 74 | my ($id_map, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf); |
deaf7667 | 75 | |
0a49c44e WB |
76 | # Unmount first when the user mounted the container with "pct mount". |
77 | eval { | |
78 | PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {}); | |
79 | }; | |
deaf7667 | 80 | |
b58ba5d4 WB |
81 | my $rootdir_fd = undef; |
82 | my $setup_mountpoint = sub { | |
83 | my ($opt, $mountpoint) = @_; | |
84 | ||
85 | my $dir = PVE::LXC::get_staging_mount_path($opt); | |
86 | my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage( | |
87 | $mountpoint, | |
88 | $dir, | |
89 | $storage_cfg, | |
90 | undef, | |
eda68a08 TL |
91 | $root_uid, |
92 | $root_gid, | |
b58ba5d4 WB |
93 | ); |
94 | ||
95 | my ($dest_dir, $dest_base_fd); | |
96 | if ($rootdir_fd) { | |
97 | # Mount relative to the rootdir fd. | |
98 | $dest_base_fd = $rootdir_fd; | |
99 | $dest_dir = './' . $mountpoint->{mp}; | |
100 | } else { | |
101 | # Assert that 'rootfs' is the first one: | |
102 | die "foreach_mount() error\n" if $opt ne 'rootfs'; | |
103 | ||
104 | # Mount the rootfs absolutely. | |
105 | # $rootdir is not controlled by the container, so this is fine. | |
106 | sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY) | |
107 | or die "failed to open '.': $!\n"; | |
108 | $dest_dir = $rootdir; | |
109 | } | |
110 | ||
111 | PVE::LXC::mountpoint_insert_staged( | |
112 | $mount_fd, | |
113 | $dest_base_fd, | |
114 | $dest_dir, | |
115 | $opt, | |
eda68a08 TL |
116 | $root_uid, |
117 | $root_gid, | |
b58ba5d4 WB |
118 | ); |
119 | ||
120 | # From now on we mount inside our rootfs: | |
121 | if (!$rootdir_fd) { | |
122 | $rootdir_fd = $mount_fd; | |
123 | } | |
124 | ||
125 | push @$devices, $dev if $dev && $mountpoint->{quota}; | |
126 | }; | |
3bd97c10 | 127 | |
015740e6 | 128 | PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); |
deaf7667 | 129 | |
ce1976b8 FS |
130 | # Device passthrough |
131 | my $passthrough_devices = []; | |
132 | ||
133 | my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough"; | |
134 | File::Path::make_path($passthrough_dir); | |
135 | PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k") | |
136 | or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!"); | |
137 | ||
138 | my $setup_passthrough_device = sub { | |
139 | my ($key, $device) = @_; | |
140 | ||
141 | my $absolute_path = $device->{path}; | |
142 | my ($mode, $rdev) = (stat($absolute_path))[2, 6]; | |
143 | ||
144 | die "Could not get mode or device ID of $absolute_path\n" | |
145 | if (!defined($mode) || !defined($rdev)); | |
146 | ||
147 | my $passthrough_device_path = $passthrough_dir . $absolute_path; | |
148 | File::Path::make_path(dirname($passthrough_device_path)); | |
149 | PVE::Tools::mknod($passthrough_device_path, $mode, $rdev) | |
150 | or die("failed to mknod $passthrough_device_path: $!\n"); | |
151 | ||
152 | # Use chmod because umask could mess with the access mode on mknod | |
153 | my $passthrough_mode = 0660; | |
154 | $passthrough_mode = oct($device->{mode}) if defined($device->{mode}); | |
155 | chmod $passthrough_mode, $passthrough_device_path | |
156 | or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n"; | |
157 | ||
158 | # Set uid and gid of the device node | |
159 | my $uid = 0; | |
160 | my $gid = 0; | |
161 | $uid = $device->{uid} if defined($device->{uid}); | |
162 | $gid = $device->{gid} if defined($device->{gid}); | |
163 | $uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map); | |
164 | $gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map); | |
165 | chown $uid, $gid, $passthrough_device_path | |
166 | or die("failed to chown $uid:$gid $passthrough_device_path: $!\n"); | |
167 | ||
168 | push @$passthrough_devices, [$absolute_path, $mode, $rdev]; | |
169 | }; | |
170 | ||
171 | PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device); | |
172 | ||
0a49c44e WB |
173 | my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); |
174 | $lxc_setup->pre_start_hook(); | |
deaf7667 | 175 | |
f7073b99 | 176 | if (PVE::CGroup::cgroup_mode() == 2) { |
e54a2ead TL |
177 | if (!$lxc_setup->unified_cgroupv2_support()) { |
178 | log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2" | |
179 | ." environment! Please see documentation -> container -> cgroup version."); | |
f7073b99 SI |
180 | syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n"); |
181 | } | |
182 | } | |
183 | ||
0a49c44e WB |
184 | if (@$devices) { |
185 | my $devlist = ''; | |
186 | foreach my $dev (@$devices) { | |
187 | my ($mode, $rdev) = (stat($dev))[2,6]; | |
188 | next if !$mode || !S_ISBLK($mode) || !$rdev; | |
189 | my $major = PVE::Tools::dev_t_major($rdev); | |
190 | my $minor = PVE::Tools::dev_t_minor($rdev); | |
191 | $devlist .= "b:$major:$minor:$dev\n"; | |
192 | } | |
ce1976b8 FS |
193 | PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mounts", $devlist); |
194 | } | |
195 | ||
196 | if (@$passthrough_devices) { | |
197 | my $devlist = ''; | |
198 | for my $dev (@$passthrough_devices) { | |
199 | my ($path, $mode, $rdev) = @$dev; | |
200 | my $major = PVE::Tools::dev_t_major($rdev); | |
201 | my $minor = PVE::Tools::dev_t_minor($rdev); | |
202 | my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; | |
203 | $devlist .= "$device_type_char:$major:$minor:$path\n"; | |
204 | } | |
205 | PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist); | |
0a49c44e | 206 | } |
e77973f4 AD |
207 | |
208 | if ($have_sdn) { | |
209 | for my $k (keys %$conf) { | |
210 | next if $k !~ /^net(\d+)/; | |
211 | my $net = PVE::LXC::Config->parse_lxc_network($conf->{$k}); | |
212 | next if $net->{type} ne 'veth'; | |
9c9ba838 | 213 | PVE::Network::SDN::Vnets::add_dhcp_mapping($net->{bridge}, $net->{hwaddr}, $vmid, $conf->{hostname}); |
e77973f4 AD |
214 | } |
215 | } | |
0a49c44e | 216 | }); |
51ae28ec WB |
217 | |
218 | # Leftover cgroups prevent lxc from starting without any useful information | |
219 | # showing up in the journal, it is also often unable to properly clean them up | |
220 | # at shutdown, so we do this here. | |
221 | sub cleanup_cgroups($) { | |
222 | my ($vmid) = @_; | |
223 | ||
85ccb17f | 224 | if (PVE::CGroup::cgroup_mode() == 2) { |
51ae28ec WB |
225 | rmdir_recursive("/sys/fs/cgroup/lxc/$vmid"); |
226 | rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid"); | |
227 | } else { | |
36def186 | 228 | my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers(); |
51ae28ec WB |
229 | |
230 | my @controllers_cgv1 = keys %$v1; | |
231 | foreach my $controller (@controllers_cgv1) { | |
232 | $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd` | |
de16102c WB |
233 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid"); |
234 | rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid"); | |
51ae28ec WB |
235 | } |
236 | ||
237 | if ($v2) { | |
238 | rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid"); | |
239 | rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid"); | |
240 | } | |
241 | } | |
242 | } | |
243 | ||
244 | # FIXME: This is an ugly version without openat() because perl has no equivalent | |
245 | # of fdopendir() so we cannot readdir from an openat() opened handle. | |
246 | sub rmdir_recursive { | |
247 | my ($path) = @_; | |
248 | ||
249 | my $dh; | |
250 | if (!opendir($dh, $path)) { | |
251 | return if $!{ENOENT}; | |
252 | die "failed to open directory '$path': $!\n"; | |
253 | } | |
254 | ||
255 | while (defined(my $entry = readdir($dh))) { | |
256 | next if $entry eq '.' || $entry eq '..'; | |
257 | my $next = "$path/$entry"; | |
258 | next if ! -d $next; | |
259 | rmdir_recursive($next); | |
260 | } | |
261 | ||
262 | rmdir($path) or die "failed to remove directory '$path': $!\n"; | |
263 | } |