X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=src%2FPVE%2FLXC.pm;h=ad483924d7bd2c7c5b107cd8194c909210fd4612;hb=3b5070d437c1b637daa99aaac6cf507999fa8241;hp=bb577faa58f1db2c1127d7111d0b0dd3fc21c0c0;hpb=f2357408bbd03c4b0114338a9eb49238e2dd1ed6;p=pve-container.git diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm index bb577fa..ad48392 100644 --- a/src/PVE/LXC.pm +++ b/src/PVE/LXC.pm @@ -11,31 +11,25 @@ use File::Path; use File::Spec; use Cwd qw(); use Fcntl qw(O_RDONLY O_NOFOLLOW O_DIRECTORY); -use Errno qw(ELOOP EROFS); +use Errno qw(ELOOP ENOTDIR EROFS); use PVE::Exception qw(raise_perm_exc); use PVE::Storage; use PVE::SafeSyslog; use PVE::INotify; use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full O_PATH); +use PVE::CpuSet; use PVE::Network; use PVE::AccessControl; use PVE::ProcFSTools; +use PVE::Syscall; use PVE::LXC::Config; use Time::HiRes qw (gettimeofday); -use Data::Dumper; - my $nodename = PVE::INotify::nodename(); my $cpuinfo= PVE::ProcFSTools::read_cpuinfo(); -our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls', - '--xattrs', - '--xattrs-include=user.*', - '--xattrs-include=security.capability', - '--warning=no-xattr-write' ]; - sub config_list { my $vmlist = PVE::Cluster::get_vmlist(); my $res = {}; @@ -104,9 +98,9 @@ sub get_container_disk_usage { my $last_proc_vmid_stat; my $parse_cpuacct_stat = sub { - my ($vmid) = @_; + my ($vmid, $unprivileged) = @_; - my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1); + my $raw = read_cgroup_value('cpuacct', $vmid, $unprivileged, 'cpuacct.stat', 1); my $stat = {}; @@ -133,6 +127,8 @@ sub vmstatus { my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0]; + my $unprivileged = {}; + foreach my $vmid (keys %$list) { my $d = $list->{$vmid}; @@ -144,6 +140,8 @@ sub vmstatus { my $cfspath = PVE::LXC::Config->cfs_config_path($vmid); my $conf = PVE::Cluster::cfs_read_file($cfspath) || {}; + $unprivileged->{$vmid} = $conf->{unprivileged}; + $d->{name} = $conf->{'hostname'} || "CT$vmid"; $d->{name} =~ s/[\s]//g; @@ -193,22 +191,24 @@ sub vmstatus { my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime $d->{uptime} = time - $ctime; # the method lxcfs uses - my $memory_stat = read_cgroup_list('memory', $vmid, 'memory.stat'); - my $mem_usage_in_bytes = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes'); + my $unpriv = $unprivileged->{$vmid}; + + my $memory_stat = read_cgroup_list('memory', $vmid, $unpriv, 'memory.stat'); + my $mem_usage_in_bytes = read_cgroup_value('memory', $vmid, $unpriv, 'memory.usage_in_bytes'); - $d->{mem} = $mem_usage_in_bytes - $memory_stat->{cache}; - $d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $mem_usage_in_bytes; + $d->{mem} = $mem_usage_in_bytes - $memory_stat->{total_cache}; + $d->{swap} = read_cgroup_value('memory', $vmid, $unpriv, 'memory.memsw.usage_in_bytes') - $mem_usage_in_bytes; - my $blkio_bytes = read_cgroup_value('blkio', $vmid, 'blkio.throttle.io_service_bytes', 1); + my $blkio_bytes = read_cgroup_value('blkio', $vmid, $unpriv, 'blkio.throttle.io_service_bytes', 1); my @bytes = split(/\n/, $blkio_bytes); foreach my $byte (@bytes) { if (my ($key, $value) = $byte =~ /(Read|Write)\s+(\d+)/) { - $d->{diskread} = $2 if $key eq 'Read'; - $d->{diskwrite} = $2 if $key eq 'Write'; + $d->{diskread} += $2 if $key eq 'Read'; + $d->{diskwrite} += $2 if $key eq 'Write'; } } - my $pstat = &$parse_cpuacct_stat($vmid); + my $pstat = $parse_cpuacct_stat->($vmid, $unpriv); my $used = $pstat->{utime} + $pstat->{stime}; @@ -255,18 +255,19 @@ sub vmstatus { return $list; } -sub read_cgroup_list { - my ($group, $vmid, $name) = @_; +sub read_cgroup_list($$$$) { + my ($group, $vmid, $unprivileged, $name) = @_; - my $content = read_cgroup_value($group, $vmid, $name, 1); + my $content = read_cgroup_value($group, $vmid, $unprivileged, $name, 1); return { split(/\s+/, $content) }; } -sub read_cgroup_value { - my ($group, $vmid, $name, $full) = @_; +sub read_cgroup_value($$$$$) { + my ($group, $vmid, $unprivileged, $name, $full) = @_; - my $path = "/sys/fs/cgroup/$group/lxc/$vmid/$name"; + my $nsdir = $unprivileged ? '' : 'ns/'; + my $path = "/sys/fs/cgroup/$group/lxc/$vmid/${nsdir}$name"; return PVE::Tools::file_get_contents($path) if $full; @@ -355,20 +356,17 @@ sub update_lxc_config { $raw .= "lxc.arch = $conf->{arch}\n"; my $unprivileged = $conf->{unprivileged}; - my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}}; + my $custom_idmap = grep { $_->[0] eq 'lxc.idmap' } @{$conf->{lxc}}; my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error"; - if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine | gentoo | unmanaged)$/x) { - my $inc ="/usr/share/lxc/config/$ostype.common.conf"; - $inc ="/usr/share/lxc/config/common.conf" if !-f $inc; - $raw .= "lxc.include = $inc\n"; - if ($unprivileged || $custom_idmap) { - $inc = "/usr/share/lxc/config/$ostype.userns.conf"; - $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc; - $raw .= "lxc.include = $inc\n" - } - } else { - die "implement me (ostype $ostype)"; + + my $inc ="/usr/share/lxc/config/$ostype.common.conf"; + $inc ="/usr/share/lxc/config/common.conf" if !-f $inc; + $raw .= "lxc.include = $inc\n"; + if ($unprivileged || $custom_idmap) { + $inc = "/usr/share/lxc/config/$ostype.userns.conf"; + $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc; + $raw .= "lxc.include = $inc\n" } # WARNING: DO NOT REMOVE this without making sure that loop device nodes @@ -380,23 +378,23 @@ sub update_lxc_config { # Should we read them from /etc/subuid? if ($unprivileged && !$custom_idmap) { - $raw .= "lxc.id_map = u 0 100000 65536\n"; - $raw .= "lxc.id_map = g 0 100000 65536\n"; + $raw .= "lxc.idmap = u 0 100000 65536\n"; + $raw .= "lxc.idmap = g 0 100000 65536\n"; } if (!PVE::LXC::Config->has_dev_console($conf)) { - $raw .= "lxc.console = none\n"; + $raw .= "lxc.console.path = none\n"; $raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n"; } my $ttycount = PVE::LXC::Config->get_tty_count($conf); - $raw .= "lxc.tty = $ttycount\n"; + $raw .= "lxc.tty.max = $ttycount\n"; # some init scripts expect a linux terminal (turnkey). $raw .= "lxc.environment = TERM=linux\n"; my $utsname = $conf->{hostname} || "CT$vmid"; - $raw .= "lxc.utsname = $utsname\n"; + $raw .= "lxc.uts.name = $utsname\n"; my $memory = $conf->{memory} || 512; my $swap = $conf->{swap} // 0; @@ -421,30 +419,40 @@ sub update_lxc_config { my $mountpoint = PVE::LXC::Config->parse_ct_rootfs($conf->{rootfs}); - $raw .= "lxc.rootfs = $dir/rootfs\n"; + $raw .= "lxc.rootfs.path = $dir/rootfs\n"; - my $netcount = 0; foreach my $k (sort keys %$conf) { next if $k !~ m/^net(\d+)$/; my $ind = $1; my $d = PVE::LXC::Config->parse_lxc_network($conf->{$k}); - $netcount++; - $raw .= "lxc.network.type = veth\n"; - $raw .= "lxc.network.veth.pair = veth${vmid}i${ind}\n"; - $raw .= "lxc.network.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr}); - $raw .= "lxc.network.name = $d->{name}\n" if defined($d->{name}); - $raw .= "lxc.network.mtu = $d->{mtu}\n" if defined($d->{mtu}); + $raw .= "lxc.net.$ind.type = veth\n"; + $raw .= "lxc.net.$ind.veth.pair = veth${vmid}i${ind}\n"; + $raw .= "lxc.net.$ind.hwaddr = $d->{hwaddr}\n" if defined($d->{hwaddr}); + $raw .= "lxc.net.$ind.name = $d->{name}\n" if defined($d->{name}); + $raw .= "lxc.net.$ind.mtu = $d->{mtu}\n" if defined($d->{mtu}); } + my $had_cpuset = 0; if (my $lxcconf = $conf->{lxc}) { foreach my $entry (@$lxcconf) { my ($k, $v) = @$entry; - $netcount++ if $k eq 'lxc.network.type'; + $had_cpuset = 1 if $k eq 'lxc.cgroup.cpuset.cpus'; $raw .= "$k = $v\n"; } } - $raw .= "lxc.network.type = empty\n" if !$netcount; + my $cores = $conf->{cores}; + if (!$had_cpuset && $cores) { + my $cpuset = eval { PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus') }; + $cpuset = PVE::CpuSet->new_from_cgroup('', 'effective_cpus') if !$cpuset; + my @members = $cpuset->members(); + while (scalar(@members) > $cores) { + my $randidx = int(rand(scalar(@members))); + $cpuset->delete($members[$randidx]); + splice(@members, $randidx, 1); # keep track of the changes + } + $raw .= "lxc.cgroup.cpuset.cpus = ".$cpuset->short_string()."\n"; + } File::Path::mkpath("$dir/rootfs"); @@ -881,7 +889,8 @@ sub check_ct_modify_config_perm { return if $delete; my $data = $opt eq 'rootfs' ? PVE::LXC::Config->parse_ct_rootfs($newconf->{$opt}) : PVE::LXC::Config->parse_ct_mountpoint($newconf->{$opt}); - raise_perm_exc("mountpoint type $data->{type}") if $data->{type} ne 'volume'; + raise_perm_exc("mount point type $data->{type} is only allowed for root\@pam") + if $data->{type} ne 'volume'; } elsif ($opt eq 'memory' || $opt eq 'swap') { $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']); } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' || @@ -1035,7 +1044,7 @@ sub walk_tree_nofollow($$$) { if (!$next) { # failed, check for symlinks and try to create the path - die "symlink encountered at: $dir\n" if $! == ELOOP; + die "symlink encountered at: $dir\n" if $! == ELOOP || $! == ENOTDIR; die "cannot open directory $dir: $!\n" if !$mkdir; # We don't check for errors on mkdirat() here and just try to @@ -1085,9 +1094,8 @@ sub __bindmount_verify { die "failed to open mount point: $!\n" if !$destdh; if ($ro) { my $dot = '.'; - # 269: faccessat() # no separate function because 99% of the time it's the wrong thing to use. - if (syscall(269, fileno($destdh), $dot, &POSIX::W_OK, 0) != -1) { + if (syscall(PVE::Syscall::faccessat, fileno($destdh), $dot, &POSIX::W_OK, 0) != -1) { die "failed to mark bind mount read only\n"; } die "read-only check failed: $!\n" if $! != EROFS; @@ -1320,6 +1328,7 @@ sub destroy_disks { } } +our $NEW_DISK_RE = qr/^([^:\s]+):(\d+(\.\d+)?)$/; sub create_disks { my ($storecfg, $vmid, $settings, $conf) = @_; @@ -1337,7 +1346,7 @@ sub create_disks { my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1); - if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) { + if ($storage && ($volid =~ $NEW_DISK_RE)) { my ($storeid, $size_gb) = ($1, $2); my $size_kb = int(${size_gb}*1024) * 1024; @@ -1468,7 +1477,8 @@ sub parse_id_maps { my $lxc = $conf->{lxc}; foreach my $entry (@$lxc) { my ($key, $value) = @$entry; - next if $key ne 'lxc.id_map'; + # FIXME: remove the 'id_map' variant when lxc-3.0 arrives + next if $key ne 'lxc.idmap' && $key ne 'lxc.id_map'; if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) { my ($type, $ct, $host, $length) = ($1, $2, $3, $4); push @$id_map, [$type, $ct, $host, $length]; @@ -1477,7 +1487,7 @@ sub parse_id_maps { $rootgid = $host if $type eq 'g'; } } else { - die "failed to parse id_map: $value\n"; + die "failed to parse idmap: $value\n"; } }