use PVE::Network;
use PVE::AccessControl;
use PVE::ProcFSTools;
+use PVE::RESTEnvironment;
use PVE::Syscall qw(:fsmount);
use PVE::LXC::Config;
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
+our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/;
+
sub config_list {
my $vmlist = PVE::Cluster::get_vmlist();
my $res = {};
my $rules = {
keyctl => ['errno 38'],
+
+ # Disable btrfs ioctrls since they don't work particularly well in user namespaces.
+ # Particularly, without the mount option to enable rmdir removing snapshots, user
+ # namespaces can create snapshots but neither `show` or `delete` them, which is quite
+ # horrible, so for now, just disable this entirely:
+ #
+ # BTRFS_IOCTL_MAGIC 0x94, _IOC type shift is 8,
+ # so `(req & 0xFF00) == 0x9400` is a btrfs ioctl and gets an EPERM
+ ioctl => ['errno 1 [1,0x9400,SCMP_CMP_MASKED_EQ,0xff00]'],
};
my $raw_conf = '';
my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
+ File::Path::mkpath($dir);
+
my $cfgpath = '/usr/share/lxc/config';
my $inc = "$cfgpath/$ostype.common.conf";
$inc ="$cfgpath/common.conf" if !-f $inc;
$raw .= "lxc.cgroup.cpu.cfs_quota_us = $value\n";
}
- my $shares = $conf->{cpuunits} || 1024;
+ my $shares = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
$raw .= "lxc.cgroup.cpu.shares = $shares\n";
} elsif ($cgv2->{cpu}) {
# See PVE::CGroup
}
if (defined(my $shares = $conf->{cpuunits})) {
- die "cpu weight (shares) must be in range [1, 10000]\n"
- if $shares < 1 || $shares > 10000;
+ $shares = PVE::CGroup::clamp_cpu_shares($shares);
$raw .= "lxc.cgroup2.cpu.weight = $shares\n";
}
}
$raw .= "lxc.net.$ind.veth.pair = veth${vmid}i${ind}\n";
$raw .= "lxc.net.$ind.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
$raw .= "lxc.net.$ind.name = $d->{name}\n" if defined($d->{name});
- $raw .= "lxc.net.$ind.mtu = $d->{mtu}\n" if defined($d->{mtu});
+
+ # Keep container from starting with invalid mtu configuration
+ if (my $mtu = $d->{mtu}) {
+ my $bridge_mtu = PVE::Network::read_bridge_mtu($d->{bridge});
+ die "$k: MTU size '$mtu' is bigger than bridge MTU '$bridge_mtu'\n"
+ if ($mtu > $bridge_mtu);
+
+ $raw .= "lxc.net.$ind.mtu = $mtu\n";
+ }
# Starting with lxc 4.0, we do not patch lxc to execute our up-scripts.
if ($lxc_major >= 4) {
sub destroy_lxc_container {
my ($storage_cfg, $vmid, $conf, $replacement_conf, $purge_unreferenced) = @_;
- PVE::LXC::Config->foreach_volume_full($conf, {include_unused => 1}, sub {
+ my $volids = {};
+ my $remove_volume = sub {
my ($ms, $mountpoint) = @_;
- delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
- });
+
+ my $volume = $mountpoint->{volume};
+
+ return if $volids->{$volume};
+ $volids->{$volume} = 1;
+
+ delete_mountpoint_volume($storage_cfg, $vmid, $volume);
+ };
+ PVE::LXC::Config->foreach_volume_full($conf, {include_unused => 1}, $remove_volume);
+
+ PVE::LXC::Config->foreach_volume_full($conf->{pending}, {include_unused => 1}, $remove_volume);
if ($purge_unreferenced) { # also remove unreferenced disk
my $vmdisks = PVE::Storage::vdisk_list($storage_cfg, undef, $vmid, undef, 'rootdir');
} else {
if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
- safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
+ safe_num_ne($oldnet->{firewall}, $newnet->{firewall})
+ ) {
if ($oldnet->{bridge}) {
PVE::Network::tap_unplug($veth);
PVE::LXC::Config->write_config($vmid, $conf);
}
+ my ($bridge, $mac, $firewall, $rate) = $newnet->@{'bridge', 'hwaddr', 'firewall', 'rate'};
if ($have_sdn) {
- PVE::Network::SDN::Zones::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
+ PVE::Network::SDN::Zones::tap_plug(
+ $veth, $bridge, $newnet->{tag}, $firewall, $newnet->{trunks}, $rate);
+ PVE::Network::SDN::Zones::add_bridge_fdb($veth, $mac, $bridge, $firewall);
} else {
- PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
+ PVE::Network::tap_plug(
+ $veth, $bridge, $newnet->{tag}, $firewall, $newnet->{trunks}, $rate, { mac => $mac });
}
# This includes the rate:
if ($have_sdn) {
PVE::Network::SDN::Zones::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
PVE::Network::SDN::Zones::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
+ PVE::Network::SDN::Zones::add_bridge_fdb($veth, $newnet->{hwaddr}, $newnet->{bridge}, $newnet->{firewall});
} else {
PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
+ PVE::Network::add_bridge_fdb($veth, $newnet->{hwaddr}, $newnet->{firewall}); # early returns if brport has learning on
}
# attach peer in container
}
sub check_ct_modify_config_perm {
- my ($rpcenv, $authuser, $vmid, $pool, $newconf, $delete) = @_;
+ my ($rpcenv, $authuser, $vmid, $pool, $oldconf, $newconf, $delete, $unprivileged) = @_;
return 1 if $authuser eq 'root@pam';
+ my $storage_cfg = PVE::Storage::config();
my $check = sub {
my ($opt, $delete) = @_;
my $data = PVE::LXC::Config->parse_volume($opt, $newconf->{$opt});
raise_perm_exc("mount point type $data->{type} is only allowed for root\@pam")
if $data->{type} ne 'volume';
+ my $volid = $data->{volume};
+ if ($volid =~ $NEW_DISK_RE) {
+ my $sid = $1;
+ $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
+ } else {
+ PVE::Storage::check_volume_access(
+ $rpcenv,
+ $authuser,
+ $storage_cfg,
+ $vmid,
+ $volid,
+ 'rootdir',
+ );
+ }
} elsif ($opt eq 'memory' || $opt eq 'swap') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
} elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
$opt eq 'searchdomain' || $opt eq 'hostname') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
} elsif ($opt eq 'features') {
- # For now this is restricted to root@pam
- raise_perm_exc("changing feature flags is only allowed for root\@pam");
+ raise_perm_exc("changing feature flags for privileged container is only allowed for root\@pam")
+ if !$unprivileged;
+
+ my $nesting_changed = 0;
+ my $other_changed = 0;
+ if (!$delete) {
+ my $features = PVE::LXC::Config->parse_features($newconf->{$opt});
+ if (defined($oldconf) && $oldconf->{$opt}) {
+ # existing container with features
+ my $old_features = PVE::LXC::Config->parse_features($oldconf->{$opt});
+ for my $feature ((keys %$old_features, keys %$features)) {
+ my $old = $old_features->{$feature} // '';
+ my $new = $features->{$feature} // '';
+ if ($old ne $new) {
+ if ($feature eq 'nesting') {
+ $nesting_changed = 1;
+ next;
+ } else {
+ $other_changed = 1;
+ last;
+ }
+ }
+ }
+ } else {
+ # new container or no features defined
+ if (scalar(keys %$features) == 1 && $features->{nesting}) {
+ $nesting_changed = 1;
+ } elsif (scalar(keys %$features) > 0) {
+ $other_changed = 1;
+ }
+ }
+ } else {
+ my $features = PVE::LXC::Config->parse_features($oldconf->{$opt});
+ if (scalar(keys %$features) == 1 && $features->{nesting}) {
+ $nesting_changed = 1;
+ } elsif (scalar(keys %$features) > 0) {
+ $other_changed = 1;
+ }
+ }
+ raise_perm_exc("changing feature flags (except nesting) is only allowed for root\@pam")
+ if $other_changed;
+ $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Allocate'])
+ if $nesting_changed;
} elsif ($opt eq 'hookscript') {
# For now this is restricted to root@pam
raise_perm_exc("changing the hookscript is only allowed for root\@pam");
+ } elsif ($opt eq 'tags') {
+ my $old = $oldconf->{$opt};
+ my $new = $delete ? '' : $newconf->{$opt};
+ PVE::GuestHelpers::assert_tag_permissions($vmid, $old, $new, $rpcenv, $authuser);
} else {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Options']);
}
die "unsupported storage";
}
-sub mountpoint_hotplug($$$) {
+sub mountpoint_hotplug :prototype($$$$$) {
my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
return ($volid, $needs_chown);
}
-our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/;
sub create_disks {
my ($storecfg, $vmid, $settings, $conf, $pending) = @_;
print STDERR "$line\n";
}
}
+my sub print_ct_warn_log {
+ my ($vmid) = @_;
+ my $log_fn = "/run/pve/ct-$vmid.warnings";
+ my $log = eval { file_get_contents($log_fn) };
+ return if !$log;
+
+ while ($log =~ /^\h*\s*(.*?)\h*$/gm) {
+ PVE::RESTEnvironment::log_warn($1);
+ }
+ unlink $log_fn or warn "could not unlink '$log_fn' - $!\n";
+}
my sub monitor_state_change($$) {
my ($monitor_socket, $vmid) = @_;
# if debug is requested, print the log it also when the start succeeded
print_ct_stderr_log($vmid) if $is_debug;
+
+ print_ct_warn_log($vmid); # always print warn log, if any
};
if (my $err = $@) {
unlink $skiplock_flag_fn;