#!/usr/bin/perl package lxc_pve_prestart_hook; use strict; use warnings; use Fcntl qw(O_DIRECTORY :mode); use File::Path; use POSIX; use PVE::Cluster; use PVE::LXC::Config; use PVE::LXC::Setup; use PVE::LXC::Tools; use PVE::LXC; use PVE::Storage; use PVE::Syscall qw(:fsmount); use PVE::Tools qw(AT_FDCWD O_PATH); PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { my ($vmid, $vars, undef, undef) = @_; my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid"; my $skiplock = 1 if -e $skiplock_flag_fn; unlink $skiplock_flag_fn if $skiplock; PVE::Cluster::check_cfs_quorum(); # only start if we have quorum return undef if ! -f PVE::LXC::Config->config_file($vmid); my $conf = PVE::LXC::Config->load_config($vmid); if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) { PVE::LXC::Config->check_lock($conf); } cleanup_cgroups($vmid); my $storage_cfg = PVE::Storage::config(); my $rootdir = $vars->{ROOTFS_PATH}; # Delete any leftover reboot-trigger file unlink("/var/lib/lxc/$vmid/reboot"); my $devlist_file = "/var/lib/lxc/$vmid/devices"; unlink $devlist_file; my $devices = []; my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); # Unmount first when the user mounted the container with "pct mount". eval { PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {}); }; my $setup_mountpoint; if (!PVE::LXC::Tools::can_use_new_mount_api()) { # Legacy mode for old kernels: $setup_mountpoint = sub { my ($opt, $mountpoint) = @_; my (undef, undef, $dev) = PVE::LXC::mountpoint_mount( $mountpoint, $rootdir, $storage_cfg, undef, $rootuid, $rootgid, ); push @$devices, $dev if $dev && $mountpoint->{quota}; }; } else { # With newer kernels we stage mount points and then use move_mount(). my $rootdir_fd = undef; $setup_mountpoint = sub { my ($opt, $mountpoint) = @_; my $dir = PVE::LXC::get_staging_mount_path($opt); my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage( $mountpoint, $dir, $storage_cfg, undef, $rootuid, $rootgid, ); my ($dest_dir, $dest_base_fd); if ($rootdir_fd) { # Mount relative to the rootdir fd. $dest_base_fd = $rootdir_fd; $dest_dir = './' . $mountpoint->{mp}; } else { # Assert that 'rootfs' is the first one: die "foreach_mount() error\n" if $opt ne 'rootfs'; # Mount the rootfs absolutely. # $rootdir is not controlled by the container, so this is fine. sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY) or die "failed to open '.': $!\n"; $dest_dir = $rootdir; } PVE::LXC::mountpoint_insert_staged( $mount_fd, $dest_base_fd, $dest_dir, $opt, $rootuid, $rootgid, ); # From now on we mount inside our rootfs: if (!$rootdir_fd) { $rootdir_fd = $mount_fd; } push @$devices, $dev if $dev && $mountpoint->{quota}; }; } PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); $lxc_setup->pre_start_hook(); if (@$devices) { my $devlist = ''; foreach my $dev (@$devices) { my ($mode, $rdev) = (stat($dev))[2,6]; next if !$mode || !S_ISBLK($mode) || !$rdev; my $major = PVE::Tools::dev_t_major($rdev); my $minor = PVE::Tools::dev_t_minor($rdev); $devlist .= "b:$major:$minor:$dev\n"; } PVE::Tools::file_set_contents($devlist_file, $devlist); } }); # Leftover cgroups prevent lxc from starting without any useful information # showing up in the journal, it is also often unable to properly clean them up # at shutdown, so we do this here. sub cleanup_cgroups($) { my ($vmid) = @_; if (PVE::CGroup::cgroup_mode() == 2) { rmdir_recursive("/sys/fs/cgroup/lxc/$vmid"); rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid"); } else { my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems(); my @controllers_cgv1 = keys %$v1; foreach my $controller (@controllers_cgv1) { $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd` rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid"); rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid"); } if ($v2) { rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid"); rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid"); } } } # FIXME: This is an ugly version without openat() because perl has no equivalent # of fdopendir() so we cannot readdir from an openat() opened handle. sub rmdir_recursive { my ($path) = @_; my $dh; if (!opendir($dh, $path)) { return if $!{ENOENT}; die "failed to open directory '$path': $!\n"; } while (defined(my $entry = readdir($dh))) { next if $entry eq '.' || $entry eq '..'; my $next = "$path/$entry"; next if ! -d $next; rmdir_recursive($next); } rmdir($path) or die "failed to remove directory '$path': $!\n"; }