]> git.proxmox.com Git - pve-container.git/blob - src/lxc-pve-prestart-hook
sdn: pass vmid and hostname to add_dhcp_mapping
[pve-container.git] / src / lxc-pve-prestart-hook
1 #!/usr/bin/perl
2
3 package lxc_pve_prestart_hook;
4
5 use strict;
6 use warnings;
7
8 use Fcntl qw(O_DIRECTORY :mode);
9 use File::Basename;
10 use File::Path;
11 use POSIX;
12
13 use PVE::CGroup;
14 use PVE::Cluster;
15 use PVE::LXC::Config;
16 use PVE::LXC::Setup;
17 use PVE::LXC::Tools;
18 use PVE::LXC;
19 use PVE::RESTEnvironment;
20 use PVE::SafeSyslog;
21 use PVE::Storage;
22 use PVE::Syscall qw(:fsmount);
23 use PVE::Tools qw(AT_FDCWD O_PATH);
24
25 my $have_sdn;
26 eval {
27 require PVE::Network::SDN::Vnets;
28 $have_sdn = 1;
29 };
30
31 my $WARNFD;
32 sub log_warn {
33 my ($vmid, $message) = @_;
34
35 if (!defined($WARNFD)) {
36 open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings");
37 }
38 print $WARNFD "$message\n";
39 }
40
41 PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
42 my ($vmid, $vars, undef, undef) = @_;
43
44 my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
45 my $skiplock = -e $skiplock_flag_fn;
46 unlink $skiplock_flag_fn if $skiplock;
47
48 PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
49
50 PVE::RESTEnvironment->setup_default_cli_env();
51
52 return undef if ! -f PVE::LXC::Config->config_file($vmid);
53
54 my $conf = PVE::LXC::Config->load_config($vmid);
55 if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) {
56 PVE::LXC::Config->check_lock($conf);
57 }
58
59 cleanup_cgroups($vmid);
60
61 my $storage_cfg = PVE::Storage::config();
62
63 my $rootdir = $vars->{ROOTFS_PATH};
64
65 # Delete any leftover reboot-trigger file
66 unlink("/var/lib/lxc/$vmid/reboot");
67
68 # Delete the old device list file
69 # in case it was left over from a previous version of pve-container.
70 unlink("/var/lib/lxc/$vmid/devices");
71
72 my $devices = [];
73
74 my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
75
76 # Unmount first when the user mounted the container with "pct mount".
77 eval {
78 PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
79 };
80
81 my $rootdir_fd = undef;
82 my $setup_mountpoint = sub {
83 my ($opt, $mountpoint) = @_;
84
85 my $dir = PVE::LXC::get_staging_mount_path($opt);
86 my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
87 $mountpoint,
88 $dir,
89 $storage_cfg,
90 undef,
91 $rootuid,
92 $rootgid,
93 );
94
95 my ($dest_dir, $dest_base_fd);
96 if ($rootdir_fd) {
97 # Mount relative to the rootdir fd.
98 $dest_base_fd = $rootdir_fd;
99 $dest_dir = './' . $mountpoint->{mp};
100 } else {
101 # Assert that 'rootfs' is the first one:
102 die "foreach_mount() error\n" if $opt ne 'rootfs';
103
104 # Mount the rootfs absolutely.
105 # $rootdir is not controlled by the container, so this is fine.
106 sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY)
107 or die "failed to open '.': $!\n";
108 $dest_dir = $rootdir;
109 }
110
111 PVE::LXC::mountpoint_insert_staged(
112 $mount_fd,
113 $dest_base_fd,
114 $dest_dir,
115 $opt,
116 $rootuid,
117 $rootgid,
118 );
119
120 # From now on we mount inside our rootfs:
121 if (!$rootdir_fd) {
122 $rootdir_fd = $mount_fd;
123 }
124
125 push @$devices, $dev if $dev && $mountpoint->{quota};
126 };
127
128 PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
129
130 # Device passthrough
131 my $passthrough_devices = [];
132
133 my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
134 File::Path::make_path($passthrough_dir);
135 PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k")
136 or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!");
137
138 my $setup_passthrough_device = sub {
139 my ($key, $device) = @_;
140
141 my $absolute_path = $device->{path};
142 my ($mode, $rdev) = (stat($absolute_path))[2, 6];
143
144 die "Could not get mode or device ID of $absolute_path\n"
145 if (!defined($mode) || !defined($rdev));
146
147 my $passthrough_device_path = $passthrough_dir . $absolute_path;
148 File::Path::make_path(dirname($passthrough_device_path));
149 PVE::Tools::mknod($passthrough_device_path, $mode, $rdev)
150 or die("failed to mknod $passthrough_device_path: $!\n");
151
152 # Use chmod because umask could mess with the access mode on mknod
153 my $passthrough_mode = 0660;
154 $passthrough_mode = oct($device->{mode}) if defined($device->{mode});
155 chmod $passthrough_mode, $passthrough_device_path
156 or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n";
157
158 # Set uid and gid of the device node
159 my $uid = 0;
160 my $gid = 0;
161 $uid = $device->{uid} if defined($device->{uid});
162 $gid = $device->{gid} if defined($device->{gid});
163 $uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map);
164 $gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map);
165 chown $uid, $gid, $passthrough_device_path
166 or die("failed to chown $uid:$gid $passthrough_device_path: $!\n");
167
168 push @$passthrough_devices, [$absolute_path, $mode, $rdev];
169 };
170
171 PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device);
172
173 my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
174 $lxc_setup->pre_start_hook();
175
176 if (PVE::CGroup::cgroup_mode() == 2) {
177 if (!$lxc_setup->unified_cgroupv2_support()) {
178 log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2"
179 ." environment! Please see documentation -> container -> cgroup version.");
180 syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n");
181 }
182 }
183
184 if (@$devices) {
185 my $devlist = '';
186 foreach my $dev (@$devices) {
187 my ($mode, $rdev) = (stat($dev))[2,6];
188 next if !$mode || !S_ISBLK($mode) || !$rdev;
189 my $major = PVE::Tools::dev_t_major($rdev);
190 my $minor = PVE::Tools::dev_t_minor($rdev);
191 $devlist .= "b:$major:$minor:$dev\n";
192 }
193 PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mounts", $devlist);
194 }
195
196 if (@$passthrough_devices) {
197 my $devlist = '';
198 for my $dev (@$passthrough_devices) {
199 my ($path, $mode, $rdev) = @$dev;
200 my $major = PVE::Tools::dev_t_major($rdev);
201 my $minor = PVE::Tools::dev_t_minor($rdev);
202 my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
203 $devlist .= "$device_type_char:$major:$minor:$path\n";
204 }
205 PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist);
206 }
207
208 if ($have_sdn) {
209 for my $k (keys %$conf) {
210 next if $k !~ /^net(\d+)/;
211 my $net = PVE::LXC::Config->parse_lxc_network($conf->{$k});
212 next if $net->{type} ne 'veth';
213 PVE::Network::SDN::Vnets::add_dhcp_mapping($net->{bridge}, $net->{hwaddr}, $conf->{vmid}, $conf->{hostname});
214 }
215 }
216 });
217
218 # Leftover cgroups prevent lxc from starting without any useful information
219 # showing up in the journal, it is also often unable to properly clean them up
220 # at shutdown, so we do this here.
221 sub cleanup_cgroups($) {
222 my ($vmid) = @_;
223
224 if (PVE::CGroup::cgroup_mode() == 2) {
225 rmdir_recursive("/sys/fs/cgroup/lxc/$vmid");
226 rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid");
227 } else {
228 my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers();
229
230 my @controllers_cgv1 = keys %$v1;
231 foreach my $controller (@controllers_cgv1) {
232 $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd`
233 rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid");
234 rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid");
235 }
236
237 if ($v2) {
238 rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid");
239 rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid");
240 }
241 }
242 }
243
244 # FIXME: This is an ugly version without openat() because perl has no equivalent
245 # of fdopendir() so we cannot readdir from an openat() opened handle.
246 sub rmdir_recursive {
247 my ($path) = @_;
248
249 my $dh;
250 if (!opendir($dh, $path)) {
251 return if $!{ENOENT};
252 die "failed to open directory '$path': $!\n";
253 }
254
255 while (defined(my $entry = readdir($dh))) {
256 next if $entry eq '.' || $entry eq '..';
257 my $next = "$path/$entry";
258 next if ! -d $next;
259 rmdir_recursive($next);
260 }
261
262 rmdir($path) or die "failed to remove directory '$path': $!\n";
263 }