use File::Spec;
use Cwd qw();
use Fcntl qw(O_RDONLY O_NOFOLLOW O_DIRECTORY);
-use Errno qw(ELOOP EROFS);
+use Errno qw(ELOOP ENOTDIR EROFS);
use PVE::Exception qw(raise_perm_exc);
use PVE::Storage;
use PVE::SafeSyslog;
use PVE::INotify;
use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full O_PATH);
+use PVE::CpuSet;
use PVE::Network;
use PVE::AccessControl;
use PVE::ProcFSTools;
+use PVE::Syscall;
use PVE::LXC::Config;
use Time::HiRes qw (gettimeofday);
-use Data::Dumper;
-
my $nodename = PVE::INotify::nodename();
my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
-our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
- '--xattrs',
- '--xattrs-include=user.*',
- '--xattrs-include=security.capability',
- '--warning=no-xattr-write' ];
-
sub config_list {
my $vmlist = PVE::Cluster::get_vmlist();
my $res = {};
my $last_proc_vmid_stat;
my $parse_cpuacct_stat = sub {
- my ($vmid) = @_;
+ my ($vmid, $unprivileged) = @_;
- my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
+ my $raw = read_cgroup_value('cpuacct', $vmid, $unprivileged, 'cpuacct.stat', 1);
my $stat = {};
my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
+ my $unprivileged = {};
+
foreach my $vmid (keys %$list) {
my $d = $list->{$vmid};
my $cfspath = PVE::LXC::Config->cfs_config_path($vmid);
my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
+ $unprivileged->{$vmid} = $conf->{unprivileged};
+
$d->{name} = $conf->{'hostname'} || "CT$vmid";
$d->{name} =~ s/[\s]//g;
- $d->{cpus} = $conf->{cpulimit} || $cpucount;
+ $d->{cpus} = $conf->{cores} || $conf->{cpulimit};
+ $d->{cpus} = $cpucount if !$d->{cpus};
$d->{lock} = $conf->{lock} || '';
my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
$d->{uptime} = time - $ctime; # the method lxcfs uses
- $d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
- $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
+ my $unpriv = $unprivileged->{$vmid};
+
+ my $memory_stat = read_cgroup_list('memory', $vmid, $unpriv, 'memory.stat');
+ my $mem_usage_in_bytes = read_cgroup_value('memory', $vmid, $unpriv, 'memory.usage_in_bytes');
- my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1);
+ $d->{mem} = $mem_usage_in_bytes - $memory_stat->{total_cache};
+ $d->{swap} = read_cgroup_value('memory', $vmid, $unpriv, 'memory.memsw.usage_in_bytes') - $mem_usage_in_bytes;
+
+ my $blkio_bytes = read_cgroup_value('blkio', $vmid, $unpriv, 'blkio.throttle.io_service_bytes', 1);
my @bytes = split(/\n/, $blkio_bytes);
foreach my $byte (@bytes) {
if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) {
- $d->{diskread} = $2 if $key eq 'Read';
- $d->{diskwrite} = $2 if $key eq 'Write';
+ $d->{diskread} += $2 if $key eq 'Read';
+ $d->{diskwrite} += $2 if $key eq 'Write';
}
}
- my $pstat = &$parse_cpuacct_stat($vmid);
+ my $pstat = $parse_cpuacct_stat->($vmid, $unpriv);
my $used = $pstat->{utime} + $pstat->{stime};
return $list;
}
-sub read_cgroup_value {
- my ($group, $vmid, $name, $full) = @_;
+sub read_cgroup_list($$$$) {
+ my ($group, $vmid, $unprivileged, $name) = @_;
+
+ my $content = read_cgroup_value($group, $vmid, $unprivileged, $name, 1);
+
+ return { split(/\s+/, $content) };
+}
+
+sub read_cgroup_value($$$$$) {
+ my ($group, $vmid, $unprivileged, $name, $full) = @_;
- my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name";
+ my $nsdir = $unprivileged ? '' : 'ns/';
+ my $path = "/sys/fs/cgroup/$group/lxc/$vmid/${nsdir}$name";
return PVE::Tools::file_get_contents($path) if $full;
$raw .= "lxc.arch = $conf->{arch}\n";
my $unprivileged = $conf->{unprivileged};
- my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
+ my $custom_idmap = grep { $_->[0] eq 'lxc.idmap' } @{$conf->{lxc}};
my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
- if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine | gentoo | unmanaged)$/x) {
- my $inc ="/usr/share/lxc/config/$ostype.common.conf";
- $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
- $raw .= "lxc.include = $inc\n";
- if ($unprivileged || $custom_idmap) {
- $inc = "/usr/share/lxc/config/$ostype.userns.conf";
- $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
- $raw .= "lxc.include = $inc\n"
- }
- } else {
- die "implement me (ostype $ostype)";
+
+ my $inc ="/usr/share/lxc/config/$ostype.common.conf";
+ $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
+ $raw .= "lxc.include = $inc\n";
+ if ($unprivileged || $custom_idmap) {
+ $inc = "/usr/share/lxc/config/$ostype.userns.conf";
+ $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
+ $raw .= "lxc.include = $inc\n"
}
# WARNING: DO NOT REMOVE this without making sure that loop device nodes
# Should we read them from /etc/subuid?
if ($unprivileged && !$custom_idmap) {
- $raw .= "lxc.id_map = u 0 100000 65536\n";
- $raw .= "lxc.id_map = g 0 100000 65536\n";
+ $raw .= "lxc.idmap = u 0 100000 65536\n";
+ $raw .= "lxc.idmap = g 0 100000 65536\n";
}
if (!PVE::LXC::Config->has_dev_console($conf)) {
- $raw .= "lxc.console = none\n";
+ $raw .= "lxc.console.path = none\n";
$raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
}
my $ttycount = PVE::LXC::Config->get_tty_count($conf);
- $raw .= "lxc.tty = $ttycount\n";
+ $raw .= "lxc.tty.max = $ttycount\n";
# some init scripts expect a linux terminal (turnkey).
$raw .= "lxc.environment = TERM=linux\n";
my $utsname = $conf->{hostname} || "CT$vmid";
- $raw .= "lxc.utsname = $utsname\n";
+ $raw .= "lxc.uts.name = $utsname\n";
my $memory = $conf->{memory} || 512;
my $swap = $conf->{swap} // 0;
my $mountpoint = PVE::LXC::Config->parse_ct_rootfs($conf->{rootfs});
- $raw .= "lxc.rootfs = $dir/rootfs\n";
+ $raw .= "lxc.rootfs.path = $dir/rootfs\n";
- my $netcount = 0;
- foreach my $k (keys %$conf) {
+ foreach my $k (sort keys %$conf) {
next if $k !~ m/^net(\d+)$/;
my $ind = $1;
my $d = PVE::LXC::Config->parse_lxc_network($conf->{$k});
- $netcount++;
- $raw .= "lxc.network.type = veth\n";
- $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n";
- $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
- $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name});
- $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu});
+ $raw .= "lxc.net.$ind.type = veth\n";
+ $raw .= "lxc.net.$ind.veth.pair = veth${vmid}i${ind}\n";
+ $raw .= "lxc.net.$ind.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr});
+ $raw .= "lxc.net.$ind.name = $d->{name}\n" if defined($d->{name});
+ $raw .= "lxc.net.$ind.mtu = $d->{mtu}\n" if defined($d->{mtu});
}
+ my $had_cpuset = 0;
if (my $lxcconf = $conf->{lxc}) {
foreach my $entry (@$lxcconf) {
my ($k, $v) = @$entry;
- $netcount++ if $k eq 'lxc.network.type';
+ $had_cpuset = 1 if $k eq 'lxc.cgroup.cpuset.cpus';
$raw .= "$k = $v\n";
}
}
- $raw .= "lxc.network.type = empty\n" if !$netcount;
+ my $cores = $conf->{cores};
+ if (!$had_cpuset && $cores) {
+ my $cpuset = eval { PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus') };
+ $cpuset = PVE::CpuSet->new_from_cgroup('', 'effective_cpus') if !$cpuset;
+ my @members = $cpuset->members();
+ while (scalar(@members) > $cores) {
+ my $randidx = int(rand(scalar(@members)));
+ $cpuset->delete($members[$randidx]);
+ splice(@members, $randidx, 1); # keep track of the changes
+ }
+ $raw .= "lxc.cgroup.cpuset.cpus = ".$cpuset->short_string()."\n";
+ }
File::Path::mkpath("$dir/rootfs");
my $check = sub {
my ($opt, $delete) = @_;
- if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
+ if ($opt eq 'cores' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
} elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
return if $delete;
my $data = $opt eq 'rootfs' ? PVE::LXC::Config->parse_ct_rootfs($newconf->{$opt})
: PVE::LXC::Config->parse_ct_mountpoint($newconf->{$opt});
- raise_perm_exc("mountpoint type $data->{type}") if $data->{type} ne 'volume';
+ raise_perm_exc("mount point type $data->{type} is only allowed for root\@pam")
+ if $data->{type} ne 'volume';
} elsif ($opt eq 'memory' || $opt eq 'swap') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
} elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
if (!$next) {
# failed, check for symlinks and try to create the path
- die "symlink encountered at: $dir\n" if $! == ELOOP;
+ die "symlink encountered at: $dir\n" if $! == ELOOP || $! == ENOTDIR;
die "cannot open directory $dir: $!\n" if !$mkdir;
# We don't check for errors on mkdirat() here and just try to
die "failed to open mount point: $!\n" if !$destdh;
if ($ro) {
my $dot = '.';
- # 269: faccessat()
# no separate function because 99% of the time it's the wrong thing to use.
- if (syscall(269, fileno($destdh), $dot, &POSIX::W_OK, 0) != -1) {
+ if (syscall(PVE::Syscall::faccessat, fileno($destdh), $dot, &POSIX::W_OK, 0) != -1) {
die "failed to mark bind mount read only\n";
}
die "read-only check failed: $!\n" if $! != EROFS;
}
}
+our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/;
sub create_disks {
my ($storecfg, $vmid, $settings, $conf) = @_;
my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
- if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
+ if ($storage && ($volid =~ $NEW_DISK_RE)) {
my ($storeid, $size_gb) = ($1, $2);
my $size_kb = int(${size_gb}*1024) * 1024;
my $lxc = $conf->{lxc};
foreach my $entry (@$lxc) {
my ($key, $value) = @$entry;
- next if $key ne 'lxc.id_map';
+ # FIXME: remove the 'id_map' variant when lxc-3.0 arrives
+ next if $key ne 'lxc.idmap' && $key ne 'lxc.id_map';
if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
push @$id_map, [$type, $ct, $host, $length];
$rootgid = $host if $type eq 'g';
}
} else {
- die "failed to parse id_map: $value\n";
+ die "failed to parse idmap: $value\n";
}
}