+my sub print_ct_stderr_log {
+ my ($vmid) = @_;
+ my $log = eval { file_get_contents("/run/pve/ct-$vmid.stderr") };
+ return if !$log;
+
+ while ($log =~ /^\h*(lxc-start:?\s+$vmid:?\s*\S+\s*)?(.*?)\h*$/gm) {
+ my $line = $2;
+ print STDERR "$line\n";
+ }
+}
+my sub print_ct_warn_log {
+ my ($vmid) = @_;
+ my $log_fn = "/run/pve/ct-$vmid.warnings";
+ my $log = eval { file_get_contents($log_fn) };
+ return if !$log;
+
+ while ($log =~ /^\h*\s*(.*?)\h*$/gm) {
+ PVE::RESTEnvironment::log_warn($1);
+ }
+ unlink $log_fn or warn "could not unlink '$log_fn' - $!\n";
+}
+
+my sub monitor_state_change($$) {
+ my ($monitor_socket, $vmid) = @_;
+ die "no monitor socket\n" if !defined($monitor_socket);
+
+ while (1) {
+ my ($type, $name, $value) = PVE::LXC::Monitor::read_lxc_message($monitor_socket);
+
+ die "monitor socket: got EOF\n" if !defined($type);
+
+ next if $name ne "$vmid" || $type ne 'STATE';
+
+ if ($value eq PVE::LXC::Monitor::STATE_STARTING) {
+ alarm(0); # don't timeout after seeing the starting state
+ } elsif ($value eq PVE::LXC::Monitor::STATE_ABORTING ||
+ $value eq PVE::LXC::Monitor::STATE_STOPPING ||
+ $value eq PVE::LXC::Monitor::STATE_STOPPED) {
+ return 0;
+ } elsif ($value eq PVE::LXC::Monitor::STATE_RUNNING) {
+ return 1;
+ } else {
+ warn "unexpected message from monitor socket - " .
+ "type: '$type' - value: '$value'\n";
+ }
+ }
+}
+my sub monitor_start($$) {
+ my ($monitor_socket, $vmid) = @_;
+
+ my $success = eval {
+ PVE::Tools::run_with_timeout(10, \&monitor_state_change, $monitor_socket, $vmid)
+ };
+ if (my $err = $@) {
+ warn "problem with monitor socket, but continuing anyway: $err\n";
+ } elsif (!$success) {
+ print_ct_stderr_log($vmid);
+ die "startup for container '$vmid' failed\n";
+ }
+}
+
+sub vm_start {
+ my ($vmid, $conf, $skiplock, $debug) = @_;
+
+ # apply pending changes while starting
+ if (scalar(keys %{$conf->{pending}})) {
+ my $storecfg = PVE::Storage::config();
+ PVE::LXC::Config->vmconfig_apply_pending($vmid, $conf, $storecfg);
+ PVE::LXC::Config->write_config($vmid, $conf);
+ $conf = PVE::LXC::Config->load_config($vmid); # update/reload
+ }
+
+ update_lxc_config($vmid, $conf);
+
+ eval {
+ my ($id_map, undef, undef) = PVE::LXC::parse_id_maps($conf);
+ PVE::LXC::validate_id_maps($id_map);
+ };
+ warn "lxc.idmap: $@" if $@;
+
+ my $skiplock_flag_fn = "/run/lxc/skiplock-$vmid";
+
+ if ($skiplock) {
+ open(my $fh, '>', $skiplock_flag_fn) || die "failed to open $skiplock_flag_fn for writing: $!\n";
+ close($fh);
+ }
+
+ my $storage_cfg = PVE::Storage::config();
+ my $vollist = PVE::LXC::Config->get_vm_volumes($conf);
+
+ PVE::Storage::activate_volumes($storage_cfg, $vollist);
+
+ my $monitor_socket = eval { PVE::LXC::Monitor::get_monitor_socket() };
+ warn $@ if $@;
+
+ unlink "/run/pve/ct-$vmid.stderr"; # systemd does not truncate log files
+
+ my $is_debug = $debug || (!defined($debug) && $conf->{debug});
+ my $base_unit = $is_debug ? 'pve-container-debug' : 'pve-container';
+
+ my $cmd = ['systemctl', 'start', "$base_unit\@$vmid"];
+
+ PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
+ eval {
+ run_command($cmd);
+
+ monitor_start($monitor_socket, $vmid) if defined($monitor_socket);
+
+ # if debug is requested, print the log it also when the start succeeded
+ print_ct_stderr_log($vmid) if $is_debug;
+
+ print_ct_warn_log($vmid); # always print warn log, if any
+ };
+ if (my $err = $@) {
+ unlink $skiplock_flag_fn;
+ die $err;
+ }
+ PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
+
+ return;
+}
+
+# Helper to stop a container completely and make sure it has stopped completely.
+# This is necessary because we want the post-stop hook to have completed its
+# unmount-all step, but post-stop happens after lxc puts the container into the
+# STOPPED state.
+# $kill - if true it will always do an immediate hard-stop
+# $shutdown_timeout - the timeout to wait for a gracefull shutdown
+# $kill_after_timeout - if true, send a hardstop if shutdown timed out
+sub vm_stop {
+ my ($vmid, $kill, $shutdown_timeout, $kill_after_timeout) = @_;
+
+ # Open the container's command socket.
+ my $path = "\0/var/lib/lxc/$vmid/command";
+ my $sock = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM(),
+ Peer => $path,
+ );
+ if (!$sock) {
+ return if $! == ECONNREFUSED; # The container is not running
+ die "failed to open container ${vmid}'s command socket: $!\n";
+ }
+
+ my $conf = PVE::LXC::Config->load_config($vmid);
+ PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
+
+ # Stop the container:
+
+ my $cmd = ['lxc-stop', '-n', $vmid];
+
+ if ($kill) {
+ push @$cmd, '--kill'; # doesn't allow timeouts
+ } else {
+ # lxc-stop uses a default timeout
+ push @$cmd, '--nokill' if !$kill_after_timeout;
+
+ if (defined($shutdown_timeout)) {
+ push @$cmd, '--timeout', $shutdown_timeout;
+ # Give run_command 5 extra seconds
+ $shutdown_timeout += 5;
+ }
+ }
+
+ eval { run_command($cmd, timeout => $shutdown_timeout) };
+
+ # Wait until the command socket is closed.
+ # In case the lxc-stop call failed, reading from the command socket may block forever,
+ # so poll with another timeout to avoid freezing the shutdown task.
+ if (my $err = $@) {
+ warn $err if $err;
+
+ my $poll = IO::Poll->new();
+ $poll->mask($sock => POLLIN | POLLHUP); # watch for input and EOF events
+ $poll->poll($shutdown_timeout); # IO::Poll timeout is in seconds
+ return if ($poll->events($sock) & POLLHUP);
+ } else {
+ my $result = <$sock>;
+ return if !defined $result; # monitor is gone and the ct has stopped.
+ }
+
+ die "container did not stop\n";
+}
+
+sub vm_reboot {
+ my ($vmid, $timeout, $skiplock) = @_;
+
+ PVE::LXC::Config->lock_config($vmid, sub {
+ return if !check_running($vmid);
+
+ vm_stop($vmid, 0, $timeout, 1); # kill if timeout exceeds
+
+ my $conf = PVE::LXC::Config->load_config($vmid);
+ vm_start($vmid, $conf);
+ });
+}
+
+sub run_unshared {
+ my ($code) = @_;
+
+ return PVE::Tools::run_fork(sub {
+ # Unshare the mount namespace
+ die "failed to unshare mount namespace: $!\n"
+ if !PVE::Tools::unshare(PVE::Tools::CLONE_NEWNS);
+ run_command(['mount', '--make-rslave', '/']);
+ return $code->();
+ });
+}
+
+my $copy_volume = sub {
+ my ($src_volid, $src, $dst_volid, $dest, $storage_cfg, $snapname, $bwlimit, $root_uid, $root_gid) = @_;
+
+ my $src_mp = { volume => $src_volid, mp => '/', ro => 1 };
+ $src_mp->{type} = PVE::LXC::Config->classify_mountpoint($src_volid);
+
+ my $dst_mp = { volume => $dst_volid, mp => '/', ro => 0 };
+ $dst_mp->{type} = PVE::LXC::Config->classify_mountpoint($dst_volid);
+
+ my @mounted;
+ eval {
+ # mount and copy
+ mkdir $src;
+ mountpoint_mount($src_mp, $src, $storage_cfg, $snapname, $root_uid, $root_gid);
+ push @mounted, $src;
+ mkdir $dest;
+ mountpoint_mount($dst_mp, $dest, $storage_cfg, undef, $root_uid, $root_gid);
+ push @mounted, $dest;
+
+ $bwlimit //= 0;
+
+ run_command([
+ 'rsync',
+ '--stats',
+ '-X',
+ '-A',
+ '--numeric-ids',
+ '-aH',
+ '--whole-file',
+ '--sparse',
+ '--one-file-system',
+ "--bwlimit=$bwlimit",
+ "$src/",
+ $dest
+ ]);
+ };
+ my $err = $@;
+
+ # Wait for rsync's children to release dest so that
+ # consequent file operations (umount, remove) are possible
+ while ((system {"fuser"} "fuser", "-s", $dest) == 0) {sleep 1};
+
+ foreach my $mount (reverse @mounted) {
+ eval { run_command(['/bin/umount', $mount], errfunc => sub{})};
+ warn "Can't umount $mount\n" if $@;
+ }
+
+ # If this fails they're used as mount points in a concurrent operation
+ # (which should not happen but there's also no real need to get rid of them).
+ rmdir $dest;
+ rmdir $src;
+
+ die $err if $err;
+};
+
+# Should not be called after unsharing the mount namespace!
+sub copy_volume {
+ my ($mp, $vmid, $storage, $storage_cfg, $conf, $snapname, $bwlimit) = @_;
+
+ die "cannot copy volumes of type $mp->{type}\n" if $mp->{type} ne 'volume';
+ File::Path::make_path("/var/lib/lxc/$vmid");
+ my $dest = "/var/lib/lxc/$vmid/.copy-volume-1";
+ my $src = "/var/lib/lxc/$vmid/.copy-volume-2";
+
+ # get id's for unprivileged container
+ my (undef, $root_uid, $root_gid) = parse_id_maps($conf);
+
+ # Allocate the disk before unsharing in order to make sure zfs subvolumes
+ # are visible in this namespace, otherwise the host only sees the empty
+ # (not-mounted) directory.
+ my $new_volid;
+ eval {
+ # Make sure $mp contains a correct size.
+ $mp->{size} = PVE::Storage::volume_size_info($storage_cfg, $mp->{volume});
+ my $needs_chown;
+ ($new_volid, $needs_chown) = alloc_disk($storage_cfg, $vmid, $storage, $mp->{size}/1024, $root_uid, $root_gid);
+ if ($needs_chown) {
+ PVE::Storage::activate_volumes($storage_cfg, [$new_volid], undef);
+ my $path = PVE::Storage::path($storage_cfg, $new_volid, undef);
+ chown($root_uid, $root_gid, $path);
+ }
+
+ run_unshared(sub {
+ $copy_volume->($mp->{volume}, $src, $new_volid, $dest, $storage_cfg, $snapname, $bwlimit, $root_uid, $root_gid);
+ });
+ };
+ if (my $err = $@) {
+ PVE::Storage::vdisk_free($storage_cfg, $new_volid)
+ if defined($new_volid);
+ die $err;
+ }
+
+ return $new_volid;
+}
+
+sub get_lxc_version() {
+ my $version;
+ run_command([qw(lxc-start --version)], outfunc => sub {
+ my ($line) = @_;
+ # We only parse out major & minor version numbers.
+ if ($line =~ /^(\d+)\.(\d+)(?:\D.*)?$/) {
+ $version = [$1, $2];
+ }
+ });
+
+ die "failed to get lxc version\n" if !defined($version);
+
+ # return as a list:
+ return $version->@*;
+}
+
+sub freeze($) {
+ my ($vmid) = @_;
+ if (PVE::CGroup::cgroup_mode() == 2) {
+ PVE::LXC::Command::freeze($vmid, 30);
+ } else {
+ PVE::LXC::CGroup->new($vmid)->freeze_thaw(1);
+ }
+}
+
+sub thaw($) {
+ my ($vmid) = @_;
+ if (PVE::CGroup::cgroup_mode() == 2) {
+ PVE::LXC::Command::unfreeze($vmid, 30);
+ } else {
+ PVE::LXC::CGroup->new($vmid)->freeze_thaw(0);
+ }
+}
+
+sub create_ifaces_ipams_ips {
+ my ($conf, $vmid) = @_;
+
+ return if !$have_sdn;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $net = PVE::LXC::Config->parse_lxc_network($conf->{$opt});
+ next if $net->{type} ne 'veth';
+ PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{hostname}, $net->{hwaddr}, $vmid, undef, 1);
+ }
+}
+
+sub delete_ifaces_ipams_ips {
+ my ($conf, $vmid) = @_;
+
+ return if !$have_sdn;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $net = PVE::LXC::Config->parse_lxc_network($conf->{$opt});
+ eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{hwaddr}, $conf->{hostname}) };
+ warn $@ if $@;
+ }
+}