use strict;
use warnings;
-exit 0 if $ENV{LXC_NAME} && $ENV{LXC_NAME} !~ /^\d+$/;
-
-use POSIX;
+use Fcntl qw(O_DIRECTORY :mode);
use File::Path;
+use POSIX;
-use PVE::SafeSyslog;
-use PVE::Tools;
+use PVE::CGroup;
use PVE::Cluster;
-use PVE::INotify;
-use PVE::RPCEnvironment;
-use PVE::JSONSchema qw(get_standard_option);
-use PVE::CLIHandler;
-use PVE::Storage;
+use PVE::LXC::Config;
+use PVE::LXC::Setup;
+use PVE::LXC::Tools;
use PVE::LXC;
-use Data::Dumper;
-
-use base qw(PVE::CLIHandler);
-
-__PACKAGE__->register_method ({
- name => 'lxc-pve-prestart-hook',
- path => 'lxc-pve-prestart-hook',
- method => 'GET',
- description => "Create a new container root directory.",
- parameters => {
- additionalProperties => 0,
- properties => {
- name => {
- description => "The container name. This hook is only active for containers using numeric IDs, where configuration is stored on /etc/pve/lxc/<name>.conf (else it is just a NOP).",
- type => 'string',
- pattern => '\S+',
- maxLength => 64,
- },
- path => {
- description => "The path to the container configuration directory (LXC internal argument - do not pass manually!).",
- type => 'string',
- },
- rootfs => {
- description => "The path to the container's rootfs (LXC internal argument - do not pass manually!)",
- type => 'string',
- },
- },
- },
- returns => { type => 'null' },
-
- code => sub {
- my ($param) = @_;
-
- return undef if $param->{name} !~ m/^\d+$/;
-
- my $rootdir = $ENV{LXC_ROOTFS_MOUNT};
-
- my $vmid = $param->{name};
-
- PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
-
- return undef if ! -f PVE::LXC::config_file($vmid);
-
- my $conf = PVE::LXC::load_config($vmid);
-
- my $storage_cfg = PVE::Storage::config();
-
- my $vollist = PVE::LXC::get_vm_volumes($conf);
- my $loopdevlist = PVE::LXC::get_vm_volumes($conf, 'rootfs');
-
- PVE::Storage::activate_volumes($storage_cfg, $vollist);
- return undef;
- }});
-
-
-push @ARGV, 'help' if !scalar(@ARGV);
-
-my $param = {};
+use PVE::SafeSyslog;
+use PVE::Storage;
+use PVE::Syscall qw(:fsmount);
+use PVE::Tools qw(AT_FDCWD O_PATH);
-if ((scalar(@ARGV) == 3) && ($ARGV[1] eq 'lxc') && ($ARGV[2] eq 'pre-start')) {
- $param->{name} = $ENV{'LXC_NAME'};
- die "got wrong name" if $param->{name} ne $ARGV[0];
+my $WARNFD;
+sub log_warn {
+ my ($vmid, $message) = @_;
- $param->{path} = $ENV{'LXC_CONFIG_FILE'};
- $param->{rootfs} = $ENV{'LXC_ROOTFS_PATH'};
- @ARGV = ();
-} else {
- @ARGV = ('help');
+ if (!defined($WARNFD)) {
+ open($WARNFD, '>', "/run/pve/ct-${vmid}.warnings");
+ }
+ print $WARNFD "$message\n";
}
-our $cmddef = [ __PACKAGE__, 'lxc-pve-prestart-hook', [], $param];
+PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
+ my ($vmid, $vars, undef, undef) = @_;
+
+ my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
+ my $skiplock = 1 if -e $skiplock_flag_fn;
+ unlink $skiplock_flag_fn if $skiplock;
+
+ PVE::Cluster::check_cfs_quorum(); # only start if we have quorum
+
+ return undef if ! -f PVE::LXC::Config->config_file($vmid);
+
+ my $conf = PVE::LXC::Config->load_config($vmid);
+ if (!$skiplock && !PVE::LXC::Config->has_lock($conf, 'mounted')) {
+ PVE::LXC::Config->check_lock($conf);
+ }
+
+ cleanup_cgroups($vmid);
+
+ my $storage_cfg = PVE::Storage::config();
+
+ my $rootdir = $vars->{ROOTFS_PATH};
+
+ # Delete any leftover reboot-trigger file
+ unlink("/var/lib/lxc/$vmid/reboot");
+
+ my $devlist_file = "/var/lib/lxc/$vmid/devices";
+ unlink $devlist_file;
+ my $devices = [];
+
+ my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+
+ # Unmount first when the user mounted the container with "pct mount".
+ eval {
+ PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
+ };
+
+ my $setup_mountpoint;
+ if (!PVE::LXC::Tools::can_use_new_mount_api()) {
+ # Legacy mode for old kernels:
+ $setup_mountpoint = sub {
+ my ($opt, $mountpoint) = @_;
+
+ my (undef, undef, $dev) = PVE::LXC::mountpoint_mount(
+ $mountpoint,
+ $rootdir,
+ $storage_cfg,
+ undef,
+ $rootuid,
+ $rootgid,
+ );
+ push @$devices, $dev if $dev && $mountpoint->{quota};
+ };
+ } else {
+ # With newer kernels we stage mount points and then use move_mount().
+ my $rootdir_fd = undef;
+ $setup_mountpoint = sub {
+ my ($opt, $mountpoint) = @_;
+
+ my $dir = PVE::LXC::get_staging_mount_path($opt);
+ my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
+ $mountpoint,
+ $dir,
+ $storage_cfg,
+ undef,
+ $rootuid,
+ $rootgid,
+ );
+
+ my ($dest_dir, $dest_base_fd);
+ if ($rootdir_fd) {
+ # Mount relative to the rootdir fd.
+ $dest_base_fd = $rootdir_fd;
+ $dest_dir = './' . $mountpoint->{mp};
+ } else {
+ # Assert that 'rootfs' is the first one:
+ die "foreach_mount() error\n" if $opt ne 'rootfs';
+
+ # Mount the rootfs absolutely.
+ # $rootdir is not controlled by the container, so this is fine.
+ sysopen($dest_base_fd, '/', O_PATH | O_DIRECTORY)
+ or die "failed to open '.': $!\n";
+ $dest_dir = $rootdir;
+ }
+
+ PVE::LXC::mountpoint_insert_staged(
+ $mount_fd,
+ $dest_base_fd,
+ $dest_dir,
+ $opt,
+ $rootuid,
+ $rootgid,
+ );
+
+ # From now on we mount inside our rootfs:
+ if (!$rootdir_fd) {
+ $rootdir_fd = $mount_fd;
+ }
+
+ push @$devices, $dev if $dev && $mountpoint->{quota};
+ };
+ }
+
+ PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
+
+ my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
+ $lxc_setup->pre_start_hook();
+
+ if (PVE::CGroup::cgroup_mode() == 2) {
+ if (!$lxc_setup->unified_cgroupv2_support()) {
+ log_warn($vmid, "old systemd (< v232) detected, container won't run in a pure cgroupv2"
+ ." environment! Please see documentation -> container -> cgroup version.");
+ syslog('err', "CT $vmid does not support running in a pure cgroupv2 environment\n");
+ }
+ }
+
+ if (@$devices) {
+ my $devlist = '';
+ foreach my $dev (@$devices) {
+ my ($mode, $rdev) = (stat($dev))[2,6];
+ next if !$mode || !S_ISBLK($mode) || !$rdev;
+ my $major = PVE::Tools::dev_t_major($rdev);
+ my $minor = PVE::Tools::dev_t_minor($rdev);
+ $devlist .= "b:$major:$minor:$dev\n";
+ }
+ PVE::Tools::file_set_contents($devlist_file, $devlist);
+ }
+});
+
+# Leftover cgroups prevent lxc from starting without any useful information
+# showing up in the journal, it is also often unable to properly clean them up
+# at shutdown, so we do this here.
+sub cleanup_cgroups($) {
+ my ($vmid) = @_;
+
+ if (PVE::CGroup::cgroup_mode() == 2) {
+ rmdir_recursive("/sys/fs/cgroup/lxc/$vmid");
+ rmdir_recursive("/sys/fs/cgroup/lxc.monitor/$vmid");
+ } else {
+ my ($v1, $v2) = PVE::CGroup::get_cgroup_controllers();
+
+ my @controllers_cgv1 = keys %$v1;
+ foreach my $controller (@controllers_cgv1) {
+ $controller =~ s/^name=//; # `name=systemd` is mounted just as `systemd`
+ rmdir_recursive("/sys/fs/cgroup/$controller/lxc/$vmid");
+ rmdir_recursive("/sys/fs/cgroup/$controller/lxc.monitor/$vmid");
+ }
+
+ if ($v2) {
+ rmdir_recursive("/sys/fs/cgroup/unified/lxc/$vmid");
+ rmdir_recursive("/sys/fs/cgroup/unified/lxc.monitor/$vmid");
+ }
+ }
+}
-__PACKAGE__->run_cli_handler();
+# FIXME: This is an ugly version without openat() because perl has no equivalent
+# of fdopendir() so we cannot readdir from an openat() opened handle.
+sub rmdir_recursive {
+ my ($path) = @_;
+
+ my $dh;
+ if (!opendir($dh, $path)) {
+ return if $!{ENOENT};
+ die "failed to open directory '$path': $!\n";
+ }
+
+ while (defined(my $entry = readdir($dh))) {
+ next if $entry eq '.' || $entry eq '..';
+ my $next = "$path/$entry";
+ next if ! -d $next;
+ rmdir_recursive($next);
+ }
+
+ rmdir($path) or die "failed to remove directory '$path': $!\n";
+}