use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED ENOSYS EEXIST);
use IO::Socket::UNIX;
+use IO::Poll qw(POLLIN POLLHUP);
use PVE::Exception qw(raise_perm_exc);
use PVE::Storage;
$IPV4RE
$IPV6RE
);
-use PVE::RPCEnvironment;
use PVE::CpuSet;
use PVE::Network;
use PVE::AccessControl;
use PVE::ProcFSTools;
+use PVE::RESTEnvironment;
use PVE::Syscall qw(:fsmount);
use PVE::LXC::Config;
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
+our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/;
+
sub config_list {
my $vmlist = PVE::Cluster::get_vmlist();
my $res = {};
# some init scripts expect a linux terminal (turnkey).
$raw .= "lxc.environment = TERM=linux\n";
-
+
my $utsname = $conf->{hostname} || "CT$vmid";
$raw .= "lxc.uts.name = $utsname\n";
$raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
}
- my $shares = $conf->{cpuunits} || 1024;
+ my $shares = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
$raw .= "lxc.cgroup.cpu.shares = $shares\n";
} elsif ($cgv2->{cpu}) {
# See PVE::CGroup
}
if (defined(my $shares = $conf->{cpuunits})) {
- die "cpu weight (shares) must be in range [1, 10000]\n"
- if $shares < 1 || $shares > 10000;
+ $shares = PVE::CGroup::clamp_cpu_shares($shares);
$raw .= "lxc.cgroup2.cpu.weight = $shares\n";
}
}
$raw .= "lxc.net.$ind.veth.pair = veth${vmid}i${ind}\n";
$raw .= "lxc.net.$ind.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
$raw .= "lxc.net.$ind.name = $d->{name}\n" if defined($d->{name});
- $raw .= "lxc.net.$ind.mtu = $d->{mtu}\n" if defined($d->{mtu});
+
+ # Keep container from starting with invalid mtu configuration
+ if (my $mtu = $d->{mtu}) {
+ my $bridge_mtu = PVE::Network::read_bridge_mtu($d->{bridge});
+ die "$k: MTU size '$mtu' is bigger than bridge MTU '$bridge_mtu'\n"
+ if ($mtu > $bridge_mtu);
+
+ $raw .= "lxc.net.$ind.mtu = $mtu\n";
+ }
# Starting with lxc 4.0, we do not patch lxc to execute our up-scripts.
if ($lxc_major >= 4) {
sub destroy_lxc_container {
my ($storage_cfg, $vmid, $conf, $replacement_conf, $purge_unreferenced) = @_;
- PVE::LXC::Config->foreach_volume_full($conf, {include_unused => 1}, sub {
+ my $volids = {};
+ my $remove_volume = sub {
my ($ms, $mountpoint) = @_;
- delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
- });
+
+ my $volume = $mountpoint->{volume};
+
+ return if $volids->{$volume};
+ $volids->{$volume} = 1;
+
+ delete_mountpoint_volume($storage_cfg, $vmid, $volume);
+ };
+ PVE::LXC::Config->foreach_volume_full($conf, {include_unused => 1}, $remove_volume);
+
+ PVE::LXC::Config->foreach_volume_full($conf->{pending}, {include_unused => 1}, $remove_volume);
if ($purge_unreferenced) { # also remove unreferenced disk
my $vmdisks = PVE::Storage::vdisk_list($storage_cfg, undef, $vmid, undef, 'rootdir');
warn $@ if $@; # avoid errors - just warn
}
+sub net_tap_plug : prototype($$) {
+ my ($iface, $net) = @_;
+ my ($bridge, $tag, $firewall, $trunks, $rate, $hwaddr) =
+ $net->@{'bridge', 'tag', 'firewall', 'trunks', 'rate', 'hwaddr'};
+
+ if ($have_sdn) {
+ PVE::Network::SDN::Zones::tap_plug($iface, $bridge, $tag, $firewall, $trunks, $rate);
+ PVE::Network::SDN::Zones::add_bridge_fdb($iface, $hwaddr, $bridge, $firewall);
+ } else {
+ PVE::Network::tap_plug($iface, $bridge, $tag, $firewall, $trunks, $rate, { mac => $hwaddr });
+ }
+}
+
sub update_net {
my ($vmid, $conf, $opt, $newnet, $netid, $rootdir) = @_;
} else {
if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
- safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
+ safe_num_ne($oldnet->{firewall}, $newnet->{firewall})
+ ) {
if ($oldnet->{bridge}) {
PVE::Network::tap_unplug($veth);
PVE::LXC::Config->write_config($vmid, $conf);
}
- if ($have_sdn) {
- PVE::Network::SDN::Zones::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
- } else {
- PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
- }
+ PVE::LXC::net_tap_plug($veth, $newnet);
# This includes the rate:
foreach (qw(bridge tag firewall rate)) {
if ($have_sdn) {
PVE::Network::SDN::Zones::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
- PVE::Network::SDN::Zones::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
} else {
PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
- PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
}
+ PVE::LXC::net_tap_plug($veth, $newnet);
+
# attach peer in container
my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
PVE::Tools::run_command($cmd);
}
sub check_ct_modify_config_perm {
- my ($rpcenv, $authuser, $vmid, $pool, $newconf, $delete) = @_;
+ my ($rpcenv, $authuser, $vmid, $pool, $oldconf, $newconf, $delete, $unprivileged) = @_;
return 1 if $authuser eq 'root@pam';
+ my $storage_cfg = PVE::Storage::config();
my $check = sub {
my ($opt, $delete) = @_;
my $data = PVE::LXC::Config->parse_volume($opt, $newconf->{$opt});
raise_perm_exc("mount point type $data->{type} is only allowed for root\@pam")
if $data->{type} ne 'volume';
+ my $volid = $data->{volume};
+ if ($volid =~ $NEW_DISK_RE) {
+ my $sid = $1;
+ $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
+ } else {
+ PVE::Storage::check_volume_access(
+ $rpcenv,
+ $authuser,
+ $storage_cfg,
+ $vmid,
+ $volid,
+ 'rootdir',
+ );
+ }
} elsif ($opt eq 'memory' || $opt eq 'swap') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
} elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
$opt eq 'searchdomain' || $opt eq 'hostname') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
} elsif ($opt eq 'features') {
- # For now this is restricted to root@pam
- raise_perm_exc("changing feature flags is only allowed for root\@pam");
+ raise_perm_exc("changing feature flags for privileged container is only allowed for root\@pam")
+ if !$unprivileged;
+
+ my $nesting_changed = 0;
+ my $other_changed = 0;
+ if (!$delete) {
+ my $features = PVE::LXC::Config->parse_features($newconf->{$opt});
+ if (defined($oldconf) && $oldconf->{$opt}) {
+ # existing container with features
+ my $old_features = PVE::LXC::Config->parse_features($oldconf->{$opt});
+ for my $feature ((keys %$old_features, keys %$features)) {
+ my $old = $old_features->{$feature} // '';
+ my $new = $features->{$feature} // '';
+ if ($old ne $new) {
+ if ($feature eq 'nesting') {
+ $nesting_changed = 1;
+ next;
+ } else {
+ $other_changed = 1;
+ last;
+ }
+ }
+ }
+ } else {
+ # new container or no features defined
+ if (scalar(keys %$features) == 1 && $features->{nesting}) {
+ $nesting_changed = 1;
+ } elsif (scalar(keys %$features) > 0) {
+ $other_changed = 1;
+ }
+ }
+ } else {
+ my $features = PVE::LXC::Config->parse_features($oldconf->{$opt});
+ if (scalar(keys %$features) == 1 && $features->{nesting}) {
+ $nesting_changed = 1;
+ } elsif (scalar(keys %$features) > 0) {
+ $other_changed = 1;
+ }
+ }
+ raise_perm_exc("changing feature flags (except nesting) is only allowed for root\@pam")
+ if $other_changed;
+ $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Allocate'])
+ if $nesting_changed;
} elsif ($opt eq 'hookscript') {
# For now this is restricted to root@pam
raise_perm_exc("changing the hookscript is only allowed for root\@pam");
+ } elsif ($opt eq 'tags') {
+ my $old = $oldconf->{$opt};
+ my $new = $delete ? '' : $newconf->{$opt};
+ PVE::GuestHelpers::assert_tag_permissions($vmid, $old, $new, $rpcenv, $authuser);
} else {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
}
my $type = $mountpoint->{type};
my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
my $mounted_dev;
-
+
return if !$volid || !$mount;
$mount =~ s!/+!/!g;
my $mount_path;
my ($mpfd, $parentfd, $last_dir);
-
+
if (defined($rootdir)) {
($rootdir, $mount_path, $mpfd, $parentfd, $last_dir) =
__mount_prepare_rootdir($rootdir, $mount, $rootuid, $rootgid);
if (defined($stage_mount)) {
$mount_path = $rootdir;
}
-
+
my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
warn "cannot enable quota control for bind mounts\n" if $quota;
return wantarray ? ($volid, 0, undef) : $volid;
}
-
+
die "unsupported storage";
}
-sub mountpoint_hotplug($$$) {
+sub mountpoint_hotplug :prototype($$$$$) {
my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
return ($volid, $needs_chown);
}
-our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/;
sub create_disks {
my ($storecfg, $vmid, $settings, $conf, $pending) = @_;
my $log = eval { file_get_contents($log_fn) };
return if !$log;
- my $rpcenv = eval { PVE::RPCEnvironment::get() };
-
- my $warn_fn = $rpcenv ? sub { $rpcenv->warn($_[0]) } : sub { print STDERR "WARN: $_[0]\n" };
-
while ($log =~ /^\h*\s*(.*?)\h*$/gm) {
- my $line = $1;
- $warn_fn->($line);
+ PVE::RESTEnvironment::log_warn($1);
}
unlink $log_fn or warn "could not unlink '$log_fn' - $!\n";
}
}
eval { run_command($cmd, timeout => $shutdown_timeout) };
+
+ # Wait until the command socket is closed.
+ # In case the lxc-stop call failed, reading from the command socket may block forever,
+ # so poll with another timeout to avoid freezing the shutdown task.
if (my $err = $@) {
- warn $@ if $@;
- }
+ warn $err if $err;
- my $result = <$sock>;
+ my $poll = IO::Poll->new();
+ $poll->mask($sock => POLLIN | POLLHUP); # watch for input and EOF events
+ $poll->poll($shutdown_timeout); # IO::Poll timeout is in seconds
+ return if ($poll->events($sock) & POLLHUP);
+ } else {
+ my $result = <$sock>;
+ return if !defined $result; # monitor is gone and the ct has stopped.
+ }
- return if !defined $result; # monitor is gone and the ct has stopped.
die "container did not stop\n";
}