use POSIX qw(EINTR);
use File::Path;
+use File::Spec;
+use Cwd qw();
use Fcntl ':flock';
use PVE::Cluster qw(cfs_register_file cfs_read_file);
use PVE::Network;
use PVE::AccessControl;
use PVE::ProcFSTools;
+use Time::HiRes qw (gettimeofday);
use Data::Dumper;
my $nodename = PVE::INotify::nodename();
-cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
-
-PVE::JSONSchema::register_format('pve-lxc-network', \&verify_lxc_network);
-sub verify_lxc_network {
- my ($value, $noerr) = @_;
-
- return $value if parse_lxc_network($value);
-
- return undef if $noerr;
-
- die "unable to parse network setting\n";
-}
+my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
-PVE::JSONSchema::register_format('pve-ct-mountpoint', \&verify_ct_mountpoint);
-sub verify_ct_mountpoint {
- my ($value, $noerr) = @_;
+our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
+ '--xattrs',
+ '--xattrs-include=user.*',
+ '--xattrs-include=security.capability',
+ '--warning=no-xattr-write' ];
- return $value if parse_ct_mountpoint($value);
-
- return undef if $noerr;
+cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
- die "unable to parse CT mountpoint options\n";
-}
+my $rootfs_desc = {
+ volume => {
+ type => 'string',
+ default_key => 1,
+ format_description => 'volume',
+ description => 'Volume, device or directory to mount into the container.',
+ },
+ backup => {
+ type => 'boolean',
+ format_description => '[1|0]',
+ description => 'Whether to include the mountpoint in backups.',
+ optional => 1,
+ },
+ size => {
+ type => 'string',
+ format => 'disk-size',
+ format_description => 'DiskSize',
+ description => 'Volume size (read only value).',
+ optional => 1,
+ },
+};
PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
- type => 'string', format => 'pve-ct-mountpoint',
- typetext => '[volume=]volume,] [,backup=yes|no] [,size=\d+]',
+ type => 'string', format => $rootfs_desc,
description => "Use volume as container root.",
optional => 1,
});
hostname => {
optional => 1,
description => "Set a host name for the container.",
- type => 'string',
+ type => 'string', format => 'dns-name',
maxLength => 255,
},
description => {
},
searchdomain => {
optional => 1,
- type => 'string',
+ type => 'string', format => 'dns-name-list',
description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
},
nameserver => {
optional => 1,
- type => 'string',
+ type => 'string', format => 'address-list',
description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
},
rootfs => get_standard_option('pve-ct-rootfs'),
protection => {
optional => 1,
type => 'boolean',
- description => "Sets the protection flag of the container. This will prevent the remove operation.",
+ description => "Sets the protection flag of the container. This will prevent the remove operation. This will prevent the CT or CT's disk remove/update operation.",
+ default => 0,
+ },
+ unprivileged => {
+ optional => 1,
+ type => 'boolean',
+ description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
default => 0,
},
};
'lxc.hook.pre-mount' => 1,
'lxc.hook.mount' => 1,
'lxc.hook.start' => 1,
+ 'lxc.hook.stop' => 1,
'lxc.hook.post-stop' => 1,
'lxc.hook.clone' => 1,
'lxc.hook.destroy' => 1,
'lxc.' => 1,
};
+my $netconf_desc = {
+ type => {
+ type => 'string',
+ optional => 1,
+ description => "Network interface type.",
+ enum => [qw(veth)],
+ },
+ name => {
+ type => 'string',
+ format_description => 'String',
+ description => 'Name of the network device as seen from inside the container. (lxc.network.name)',
+ pattern => '[-_.\w\d]+',
+ },
+ bridge => {
+ type => 'string',
+ format_description => 'vmbr<Number>',
+ description => 'Bridge to attach the network device to.',
+ pattern => '[-_.\w\d]+',
+ optional => 1,
+ },
+ hwaddr => {
+ type => 'string',
+ format_description => 'MAC',
+ description => 'Bridge to attach the network device to. (lxc.network.hwaddr)',
+ pattern => qr/(?:[a-f0-9]{2}:){5}[a-f0-9]{2}/i,
+ optional => 1,
+ },
+ mtu => {
+ type => 'integer',
+ format_description => 'Number',
+ description => 'Maximum transfer unit of the interface. (lxc.network.mtu)',
+ minimum => 64, # minimum ethernet frame is 64 bytes
+ optional => 1,
+ },
+ ip => {
+ type => 'string',
+ format => 'pve-ipv4-config',
+ format_description => 'IPv4Format/CIDR',
+ description => 'IPv4 address in CIDR format.',
+ optional => 1,
+ },
+ gw => {
+ type => 'string',
+ format => 'ipv4',
+ format_description => 'GatewayIPv4',
+ description => 'Default gateway for IPv4 traffic.',
+ optional => 1,
+ },
+ ip6 => {
+ type => 'string',
+ format => 'pve-ipv6-config',
+ format_description => 'IPv6Format/CIDR',
+ description => 'IPv6 address in CIDR format.',
+ optional => 1,
+ },
+ gw6 => {
+ type => 'string',
+ format => 'ipv6',
+ format_description => 'GatewayIPv6',
+ description => 'Default gateway for IPv6 traffic.',
+ optional => 1,
+ },
+ firewall => {
+ type => 'boolean',
+ format_description => '[1|0]',
+ description => "Controls whether this interface's firewall rules should be used.",
+ optional => 1,
+ },
+ tag => {
+ type => 'integer',
+ format_description => 'VlanNo',
+ minimum => '2',
+ maximum => '4094',
+ description => "VLAN tag foro this interface.",
+ optional => 1,
+ },
+};
+PVE::JSONSchema::register_format('pve-lxc-network', $netconf_desc);
+
my $MAX_LXC_NETWORKS = 10;
for (my $i = 0; $i < $MAX_LXC_NETWORKS; $i++) {
$confdesc->{"net$i"} = {
optional => 1,
- type => 'string', format => 'pve-lxc-network',
- description => "Specifies network interfaces for the container.\n\n".
- "The string should have the follow format:\n\n".
- "-net<[0-9]> bridge=<vmbr<Nummber>>[,hwaddr=<MAC>]\n".
- "[,mtu=<Number>][,name=<String>][,ip=<IPv4Format/CIDR>]\n".
- ",ip6=<IPv6Format/CIDR>][,gw=<GatwayIPv4>]\n".
- ",gw6=<GatwayIPv6>][,firewall=<[1|0]>][,tag=<VlanNo>]",
+ type => 'string', format => $netconf_desc,
+ description => "Specifies network interfaces for the container.",
};
}
+my $mp_desc = {
+ %$rootfs_desc,
+ mp => {
+ type => 'string',
+ format_description => 'Path',
+ description => 'Path to the mountpoint as seen from inside the container.',
+ optional => 1,
+ },
+};
+PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
+
+my $unuseddesc = {
+ optional => 1,
+ type => 'string', format => 'pve-volume-id',
+ description => "Reference to unused volumes.",
+};
+
my $MAX_MOUNT_POINTS = 10;
for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
$confdesc->{"mp$i"} = {
optional => 1,
- type => 'string', format => 'pve-ct-mountpoint',
- typetext => '[volume=]volume,] [,backup=yes|no] [,size=\d+] [,mp=mountpoint]',
+ type => 'string', format => $mp_desc,
description => "Use volume as container mount point (experimental feature).",
optional => 1,
};
}
+my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
+for (my $i = 0; $i < $MAX_MOUNT_POINTS; $i++) {
+ $confdesc->{"unused$i"} = $unuseddesc;
+}
+
sub write_pct_config {
my ($filename, $conf) = @_;
foreach my $key (sort keys %$conf) {
next if $key eq 'digest' || $key eq 'description' || $key eq 'pending' ||
$key eq 'snapshots' || $key eq 'snapname' || $key eq 'lxc';
- $raw .= "$key: $conf->{$key}\n";
+ my $value = $conf->{$key};
+ die "detected invalid newline inside property '$key'\n" if $value =~ m/\n/;
+ $raw .= "$key: $value\n";
}
if (my $lxcconf = $conf->{lxc}) {
next;
}
- if ($line =~ m/^(lxc\.[a-z0-9_\.]+)(:|\s*=)\s*(.*?)\s*$/) {
+ if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
my $key = $1;
my $value = $3;
if ($valid_lxc_conf_keys->{$key} || $key =~ m/^lxc\.cgroup\./) {
$descr .= PVE::Tools::decode_text($2);
} elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
$conf->{snapstate} = $1;
- } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S+)\s*$/) {
+ } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(\S.*)\s*$/) {
my $key = $1;
my $value = $2;
eval { $value = check_type($key, $value); };
}
sub get_container_disk_usage {
+ my ($vmid, $pid) = @_;
+
+ return PVE::Tools::df("/proc/$pid/root/", 1);
+}
+
+my $last_proc_vmid_stat;
+
+my $parse_cpuacct_stat = sub {
my ($vmid) = @_;
- my $cmd = ['lxc-attach', '-n', $vmid, '--', 'df', '-P', '-B', '1', '/'];
+ my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
- my $res = {
- total => 0,
- used => 0,
- avail => 0,
- };
+ my $stat = {};
- my $parser = sub {
- my $line = shift;
- if (my ($fsid, $total, $used, $avail) = $line =~
- m/^(\S+.*)\s+(\d+)\s+(\d+)\s+(\d+)\s+\d+%\s.*$/) {
- $res = {
- total => $total,
- used => $used,
- avail => $avail,
- };
- }
- };
- eval { PVE::Tools::run_command($cmd, timeout => 1, outfunc => $parser); };
- warn $@ if $@;
+ if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
- return $res;
-}
+ $stat->{utime} = $1;
+ $stat->{stime} = $2;
+
+ }
+
+ return $stat;
+};
sub vmstatus {
my ($opt_vmid) = @_;
my $active_hash = list_active_containers();
+ my $cpucount = $cpuinfo->{cpus} || 1;
+
+ my $cdtime = gettimeofday;
+
+ my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
+
foreach my $vmid (keys %$list) {
my $d = $list->{$vmid};
- my $running = defined($active_hash->{$vmid});
+ eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
+ warn $@ if $@; # ignore errors (consider them stopped)
- $d->{status} = $running ? 'running' : 'stopped';
+ $d->{status} = $d->{pid} ? 'running' : 'stopped';
my $cfspath = cfs_config_path($vmid);
my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
$d->{name} = $conf->{'hostname'} || "CT$vmid";
$d->{name} =~ s/[\s]//g;
- $d->{cpus} = $conf->{cpulimit} // 0;
+ $d->{cpus} = $conf->{cpulimit} || $cpucount;
- if ($running) {
- my $res = get_container_disk_usage($vmid);
+ if ($d->{pid}) {
+ my $res = get_container_disk_usage($vmid, $d->{pid});
$d->{disk} = $res->{used};
$d->{maxdisk} = $res->{total};
} else {
foreach my $vmid (keys %$list) {
my $d = $list->{$vmid};
- next if $d->{status} ne 'running';
+ my $pid = $d->{pid};
+
+ next if !$pid; # skip stopped CTs
- $d->{uptime} = 100; # fixme:
+ my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
+ $d->{uptime} = time - $ctime; # the method lxcfs uses
$d->{mem} = read_cgroup_value('memory', $vmid, 'memory.usage_in_bytes');
$d->{swap} = read_cgroup_value('memory', $vmid, 'memory.memsw.usage_in_bytes') - $d->{mem};
$d->{diskwrite} = $2 if $key eq 'Write';
}
}
- }
- return $list;
-}
+ my $pstat = &$parse_cpuacct_stat($vmid);
-my $parse_size = sub {
- my ($value) = @_;
+ my $used = $pstat->{utime} + $pstat->{stime};
- return undef if $value !~ m/^(\d+(\.\d+)?)([KMG])?$/;
- my ($size, $unit) = ($1, $3);
- if ($unit) {
- if ($unit eq 'K') {
- $size = $size * 1024;
- } elsif ($unit eq 'M') {
- $size = $size * 1024 * 1024;
- } elsif ($unit eq 'G') {
- $size = $size * 1024 * 1024 * 1024;
+ my $old = $last_proc_vmid_stat->{$vmid};
+ if (!$old) {
+ $last_proc_vmid_stat->{$vmid} = {
+ time => $cdtime,
+ used => $used,
+ cpu => 0,
+ };
+ next;
+ }
+
+ my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
+
+ if ($dtime > 1000) {
+ my $dutime = $used - $old->{used};
+
+ $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
+ $last_proc_vmid_stat->{$vmid} = {
+ time => $cdtime,
+ used => $used,
+ cpu => $d->{cpu},
+ };
+ } else {
+ $d->{cpu} = $old->{cpu};
}
}
- return int($size);
-};
-my $format_size = sub {
- my ($size) = @_;
+ my $netdev = PVE::ProcFSTools::read_proc_net_dev();
- $size = int($size);
+ foreach my $dev (keys %$netdev) {
+ next if $dev !~ m/^veth([1-9]\d*)i/;
+ my $vmid = $1;
+ my $d = $list->{$vmid};
- my $kb = int($size/1024);
- return $size if $kb*1024 != $size;
+ next if !$d;
- my $mb = int($kb/1024);
- return "${kb}K" if $mb*1024 != $kb;
+ $d->{netout} += $netdev->{$dev}->{receive};
+ $d->{netin} += $netdev->{$dev}->{transmit};
- my $gb = int($mb/1024);
- return "${mb}M" if $gb*1024 != $mb;
+ }
- return "${gb}G";
-};
+ return $list;
+}
+
+sub classify_mountpoint {
+ my ($vol) = @_;
+ if ($vol =~ m!^/!) {
+ return 'device' if $vol =~ m!^/dev/!;
+ return 'bind';
+ }
+ return 'volume';
+}
sub parse_ct_mountpoint {
- my ($data) = @_;
+ my ($data, $noerr) = @_;
$data //= '';
- my $res = {};
-
- foreach my $p (split (/,/, $data)) {
- next if $p =~ m/^\s*$/;
+ my $res;
+ eval { $res = PVE::JSONSchema::parse_property_string($mp_desc, $data) };
+ if ($@) {
+ return undef if $noerr;
+ die $@;
+ }
- if ($p =~ m/^(volume|backup|size|mp)=(.+)$/) {
- my ($k, $v) = ($1, $2);
- return undef if defined($res->{$k});
- $res->{$k} = $v;
- } else {
- if (!$res->{volume} && $p !~ m/=/) {
- $res->{volume} = $p;
- } else {
- return undef;
- }
+ if (defined(my $size = $res->{size})) {
+ $size = PVE::JSONSchema::parse_size($size);
+ if (!defined($size)) {
+ return undef if $noerr;
+ die "invalid size: $size\n";
}
+ $res->{size} = $size;
}
- return undef if !defined($res->{volume});
-
- return undef if $res->{backup} && $res->{backup} !~ m/^(yes|no)$/;
-
- if ($res->{size}) {
- return undef if !defined($res->{size} = &$parse_size($res->{size}));
- }
+ $res->{type} = classify_mountpoint($res->{volume});
return $res;
}
sub print_ct_mountpoint {
my ($info, $nomp) = @_;
-
- my $opts = '';
-
- die "missing volume\n" if !$info->{volume};
-
- foreach my $o (qw(backup)) {
- $opts .= ",$o=$info->{$o}" if defined($info->{$o});
- }
-
- if ($info->{size}) {
- $opts .= ",size=" . &$format_size($info->{size});
- }
-
- $opts .= ",mp=$info->{mp}" if !$nomp;
-
- return "$info->{volume}$opts";
+ my $skip = [ 'type' ];
+ push @$skip, 'mp' if $nomp;
+ return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
}
sub print_lxc_network {
my $net = shift;
-
- die "no network name defined\n" if !$net->{name};
-
- my $res = "name=$net->{name}";
-
- foreach my $k (qw(hwaddr mtu bridge ip gw ip6 gw6 firewall tag)) {
- next if !defined($net->{$k});
- $res .= ",$k=$net->{$k}";
- }
-
- return $res;
+ return PVE::JSONSchema::print_property_string($net, $netconf_desc);
}
sub parse_lxc_network {
return $res if !$data;
- foreach my $pv (split (/,/, $data)) {
- if ($pv =~ m/^(bridge|hwaddr|mtu|name|ip|ip6|gw|gw6|firewall|tag)=(\S+)$/) {
- $res->{$1} = $2;
- } else {
- return undef;
- }
- }
+ $res = PVE::JSONSchema::parse_property_string($netconf_desc, $data);
$res->{type} = 'veth';
$res->{hwaddr} = PVE::Tools::random_ether_addr() if !$res->{hwaddr};
my $line = shift;
$pid = $1 if $line =~ m/^PID:\s+(\d+)$/;
};
- PVE::Tools::run_command(['lxc-info', '-n', $vmid], outfunc => $parser);
+ PVE::Tools::run_command(['lxc-info', '-n', $vmid, '-p'], outfunc => $parser);
die "unable to get PID for CT $vmid (not running?)\n" if !$pid;
return $pid;
}
-my $ipv4_reverse_mask = [
- '0.0.0.0',
- '128.0.0.0',
- '192.0.0.0',
- '224.0.0.0',
- '240.0.0.0',
- '248.0.0.0',
- '252.0.0.0',
- '254.0.0.0',
- '255.0.0.0',
- '255.128.0.0',
- '255.192.0.0',
- '255.224.0.0',
- '255.240.0.0',
- '255.248.0.0',
- '255.252.0.0',
- '255.254.0.0',
- '255.255.0.0',
- '255.255.128.0',
- '255.255.192.0',
- '255.255.224.0',
- '255.255.240.0',
- '255.255.248.0',
- '255.255.252.0',
- '255.255.254.0',
- '255.255.255.0',
- '255.255.255.128',
- '255.255.255.192',
- '255.255.255.224',
- '255.255.255.240',
- '255.255.255.248',
- '255.255.255.252',
- '255.255.255.254',
- '255.255.255.255',
-];
-
# Note: we cannot use Net:IP, because that only allows strict
# CIDR networks
sub parse_ipv4_cidr {
my ($cidr, $noerr) = @_;
- if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 < 32)) {
- return { address => $1, netmask => $ipv4_reverse_mask->[$2] };
+ if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
+ return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
}
return undef if $noerr;
die "VM is locked ($conf->{'lock'})\n" if $conf->{'lock'};
}
+sub check_protection {
+ my ($vm_conf, $err_msg) = @_;
+
+ if ($vm_conf->{protection}) {
+ die "$err_msg - protection mode enabled\n";
+ }
+}
+
sub update_lxc_config {
my ($storage_cfg, $vmid, $conf) = @_;
die "missing 'arch' - internal error" if !$conf->{arch};
$raw .= "lxc.arch = $conf->{arch}\n";
+ my $unprivileged = $conf->{unprivileged};
+ my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
+
my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
if ($ostype =~ /^(?:debian | ubuntu | centos | archlinux)$/x) {
$raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
+ if ($unprivileged || $custom_idmap) {
+ $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
+ }
} else {
die "implement me";
}
+ $raw .= "lxc.start.unshare = 1\n";
+
+ # Should we read them from /etc/subuid?
+ if ($unprivileged && !$custom_idmap) {
+ $raw .= "lxc.id_map = u 0 100000 65536\n";
+ $raw .= "lxc.id_map = g 0 100000 65536\n";
+ }
+
if (!has_dev_console($conf)) {
$raw .= "lxc.console = none\n";
$raw .= "lxc.cgroup.devices.deny = c 5:1 rwm\n";
my $ttycount = get_tty_count($conf);
$raw .= "lxc.tty = $ttycount\n";
+ # some init scripts expects a linux terminal (turnkey).
+ $raw .= "lxc.environment = TERM=linux\n";
+
my $utsname = $conf->{hostname} || "CT$vmid";
$raw .= "lxc.utsname = $utsname\n";
my $lxcmem = int($memory*1024*1024);
$raw .= "lxc.cgroup.memory.limit_in_bytes = $lxcmem\n";
+ $raw .= "lxc.cgroup.memory.kmem.limit_in_bytes = $lxcmem\n";
my $lxcswap = int(($memory + $swap)*1024*1024);
$raw .= "lxc.cgroup.memory.memsw.limit_in_bytes = $lxcswap\n";
my $mountpoint = parse_ct_mountpoint($conf->{rootfs});
$mountpoint->{mp} = '/';
-
- my ($path, $use_loopdev) = mountpoint_mount_path($mountpoint, $storage_cfg);
- $path = "loop:$path" if $use_loopdev;
- $raw .= "lxc.rootfs = $path\n";
+ $raw .= "lxc.rootfs = $dir/rootfs\n";
+ $raw .= "lxc.hook.stop = /usr/lib/x86_64-linux-gnu/lxc/hooks/unmount-namespace\n";
my $netcount = 0;
foreach my $k (keys %$conf) {
return join(' ', @list);
}
+sub add_unused_volume {
+ my ($config, $volid) = @_;
+
+ my $key;
+ for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
+ my $test = "unused$ind";
+ if (my $vid = $config->{$test}) {
+ return if $vid eq $volid; # do not add duplicates
+ } else {
+ $key = $test;
+ }
+ }
+
+ die "To many unused volume - please delete them first.\n" if !$key;
+
+ $config->{$key} = $volid;
+
+ return $key;
+}
+
sub update_pct_config {
my ($vmid, $conf, $running, $param, $delete) = @_;
my @nohotplug;
my $new_disks = 0;
+ my @deleted_volumes;
my $rootdir;
if ($running) {
$rootdir = "/proc/$pid/root";
}
+ my $hotplug_error = sub {
+ if ($running) {
+ push @nohotplug, @_;
+ return 1;
+ } else {
+ return 0;
+ }
+ };
+
if (defined($delete)) {
foreach my $opt (@$delete) {
+ if (!exists($conf->{$opt})) {
+ warn "no such option: $opt\n";
+ next;
+ }
+
if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
die "unable to delete required option '$opt'\n";
} elsif ($opt eq 'swap') {
delete $conf->{$opt};
} elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' ||
$opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
+ next if $hotplug_error->($opt);
delete $conf->{$opt};
- push @nohotplug, $opt;
- next if $running;
} elsif ($opt =~ m/^net(\d)$/) {
delete $conf->{$opt};
next if !$running;
PVE::Network::veth_delete("veth${vmid}i$netid");
} elsif ($opt eq 'protection') {
delete $conf->{$opt};
+ } elsif ($opt =~ m/^unused(\d+)$/) {
+ next if $hotplug_error->($opt);
+ check_protection($conf, "can't remove CT $vmid drive '$opt'");
+ push @deleted_volumes, $conf->{$opt};
+ delete $conf->{$opt};
} elsif ($opt =~ m/^mp(\d+)$/) {
+ next if $hotplug_error->($opt);
+ check_protection($conf, "can't remove CT $vmid drive '$opt'");
+ my $mountpoint = parse_ct_mountpoint($conf->{$opt});
+ if ($mountpoint->{type} eq 'volume') {
+ add_unused_volume($conf, $mountpoint->{volume})
+ }
delete $conf->{$opt};
- push @nohotplug, $opt;
- next if $running;
- } elsif ($opt eq 'rootfs') {
- die "implement me"
+ } elsif ($opt eq 'unprivileged') {
+ die "unable to delete read-only option: '$opt'\n";
} else {
die "implement me"
}
} elsif ($opt eq 'startup') {
$conf->{$opt} = $value;
} elsif ($opt eq 'tty' || $opt eq 'console' || $opt eq 'cmode') {
+ next if $hotplug_error->($opt);
$conf->{$opt} = $value;
- push @nohotplug, $opt;
- next if $running;
} elsif ($opt eq 'nameserver') {
+ next if $hotplug_error->($opt);
my $list = verify_nameserver_list($value);
$conf->{$opt} = $list;
- push @nohotplug, $opt;
- next if $running;
} elsif ($opt eq 'searchdomain') {
+ next if $hotplug_error->($opt);
my $list = verify_searchdomain_list($value);
$conf->{$opt} = $list;
- push @nohotplug, $opt;
- next if $running;
} elsif ($opt eq 'cpulimit') {
+ next if $hotplug_error->($opt); # FIXME: hotplug
$conf->{$opt} = $value;
- push @nohotplug, $opt; # fixme: hotplug
- next;
} elsif ($opt eq 'cpuunits') {
$conf->{$opt} = $value;
write_cgroup_value("cpu", $vmid, "cpu.shares", $value);
} elsif ($opt eq 'protection') {
$conf->{$opt} = $value ? 1 : 0;
} elsif ($opt =~ m/^mp(\d+)$/) {
+ next if $hotplug_error->($opt);
+ check_protection($conf, "can't update CT $vmid drive '$opt'");
$conf->{$opt} = $value;
$new_disks = 1;
- push @nohotplug, $opt;
- next;
} elsif ($opt eq 'rootfs') {
+ check_protection($conf, "can't update CT $vmid drive '$opt'");
die "implement me: $opt";
+ } elsif ($opt eq 'unprivileged') {
+ die "unable to modify read-only option: '$opt'\n";
} else {
die "implement me: $opt";
}
write_config($vmid, $conf) if $running;
}
- if ($running && scalar(@nohotplug)) {
- die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
+ if (@deleted_volumes) {
+ my $storage_cfg = PVE::Storage::config();
+ foreach my $volume (@deleted_volumes) {
+ delete_mountpoint_volume($storage_cfg, $vmid, $volume);
+ }
}
if ($new_disks) {
my $storage_cfg = PVE::Storage::config();
create_disks($storage_cfg, $vmid, $conf, $conf);
}
+
+ # This should be the last thing we do here
+ if ($running && scalar(@nohotplug)) {
+ die "unable to modify " . join(',', @nohotplug) . " while container is running\n";
+ }
}
sub has_dev_console {
}
my $ipv6 = $net->{ip6};
if ($ipv6) {
- if ($ipv6 =~ /^(dhcp|manual)$/) {
+ if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
$ipv6 = undef;
} else {
$ipv6 =~ s!/\d+$!!;
return ($ipv4, $ipv6);
}
+sub delete_mountpoint_volume {
+ my ($storage_cfg, $vmid, $volume) = @_;
+
+ return if classify_mountpoint($volume) ne 'volume';
+
+ my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
+ PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
+}
sub destroy_lxc_container {
my ($storage_cfg, $vmid, $conf) = @_;
foreach_mountpoint($conf, sub {
my ($ms, $mountpoint) = @_;
- my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $mountpoint->{volume});
- PVE::Storage::vdisk_free($storage_cfg, $mountpoint->{volume}) if $vmid == $owner;
+ delete_mountpoint_volume($storage_cfg, $vmid, $mountpoint->{volume});
});
rmdir "/var/lib/lxc/$vmid/rootfs";
return if !$change_ip && !$change_gw;
# step 1: add new IP, if this fails we cancel
- if ($change_ip && $newip && $newip !~ /^(?:auto|dhcp)$/) {
+ my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
+ if ($change_ip && $is_real_ip) {
eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
if (my $err = $@) {
warn $err;
# Note: 'ip route replace' can add
if ($change_gw) {
if ($newgw) {
- eval { &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw); };
+ eval {
+ if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
+ &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
+ }
+ &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
+ };
if (my $err = $@) {
warn $err;
# the route was not replaced, the old IP is still available
my $conf = load_config($vmid);
- my $cmd = "/usr/bin/lxc-freeze -n $vmid";
my $running = check_running($vmid);
eval {
if ($running) {
- PVE::Tools::run_command($cmd);
+ PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
+ PVE::Tools::run_command(['/bin/sync']);
};
my $storecfg = PVE::Storage::config();
my $rootinfo = parse_ct_mountpoint($conf->{rootfs});
my $volid = $rootinfo->{volume};
- $cmd = "/usr/bin/lxc-unfreeze -n $vmid";
if ($running) {
- PVE::Tools::run_command($cmd);
+ PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]);
};
PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
return $reverse ? reverse @names : @names;
}
+# The container might have *different* symlinks than the host. realpath/abs_path
+# use the actual filesystem to resolve links.
+sub sanitize_mountpoint {
+ my ($mp) = @_;
+ $mp = '/' . $mp; # we always start with a slash
+ $mp =~ s@/{2,}@/@g; # collapse sequences of slashes
+ $mp =~ s@/\./@@g; # collapse /./
+ $mp =~ s@/\.(/)?$@$1@; # collapse a trailing /. or /./
+ $mp =~ s@(.*)/[^/]+/\.\./@$1/@g; # collapse /../ without regard for symlinks
+ $mp =~ s@/\.\.(/)?$@$1@; # collapse trailing /.. or /../ disregarding symlinks
+ return $mp;
+}
+
sub foreach_mountpoint_full {
my ($conf, $reverse, $func) = @_;
foreach my $key (mountpoint_names($reverse)) {
my $value = $conf->{$key};
next if !defined($value);
- my $mountpoint = parse_ct_mountpoint($value);
- $mountpoint->{mp} = '/' if $key eq 'rootfs'; # just to be sure
+ my $mountpoint = parse_ct_mountpoint($value, 1);
+ next if !defined($mountpoint);
+
+ # just to be sure: rootfs is /
+ my $path = $key eq 'rootfs' ? '/' : $mountpoint->{mp};
+ $mountpoint->{mp} = sanitize_mountpoint($path);
+
+ $path = $mountpoint->{volume};
+ $mountpoint->{volume} = sanitize_mountpoint($path) if $path =~ m|^/|;
+
&$func($key, $mountpoint);
}
}
if ($opt eq 'cpus' || $opt eq 'cpuunits' || $opt eq 'cpulimit') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.CPU']);
- } elsif ($opt eq 'disk') {
+ } elsif ($opt eq 'rootfs' || $opt =~ /^mp\d+$/) {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
} elsif ($opt eq 'memory' || $opt eq 'swap') {
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
return mountpoint_mount($mountpoint, undef, $storage_cfg, $snapname);
}
+my $check_mount_path = sub {
+ my ($path) = @_;
+ $path = File::Spec->canonpath($path);
+ my $real = Cwd::realpath($path);
+ if ($real ne $path) {
+ die "mount path modified by symlink: $path != $real";
+ }
+};
+
+sub query_loopdev {
+ my ($path) = @_;
+ my $found;
+ my $parser = sub {
+ my $line = shift;
+ if ($line =~ m@^(/dev/loop\d+):@) {
+ $found = $1;
+ }
+ };
+ my $cmd = ['losetup', '--associated', $path];
+ PVE::Tools::run_command($cmd, outfunc => $parser);
+ return $found;
+}
+
# use $rootdir = undef to just return the corresponding mount path
sub mountpoint_mount {
my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
my $volid = $mountpoint->{volume};
my $mount = $mountpoint->{mp};
+ my $type = $mountpoint->{type};
return if !$volid || !$mount;
$rootdir =~ s!/+$!!;
$mount_path = "$rootdir/$mount";
$mount_path =~ s!/+!/!g;
+ &$check_mount_path($mount_path);
File::Path::mkpath($mount_path);
}
my ($vtype, undef, undef, undef, undef, $isBase, $format) =
PVE::Storage::parse_volname($storage_cfg, $volid);
+ $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
+
if ($format eq 'subvol') {
if ($mount_path) {
if ($snapname) {
- if ($scfg->{type} eq 'zfspool') {
- my $path_arg = $path;
- $path_arg =~ s!^/+!!;
- PVE::Tools::run_command(['mount', '-o', 'ro', '-t', 'zfs', $path_arg, $mount_path]);
- } else {
+ if ($scfg->{type} ne 'zfspool') {
die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
}
- } else {
- PVE::Tools::run_command(['mount', '-o', 'bind', $path, $mount_path]);
}
+ PVE::Tools::run_command(['mount', '-o', 'bind', $path, $mount_path]);
}
return wantarray ? ($path, 0) : $path;
- } elsif ($format eq 'raw') {
+ } elsif ($format eq 'raw' || $format eq 'iso') {
my $use_loopdev = 0;
my @extra_opts;
if ($scfg->{path}) {
die "unsupported storage type '$scfg->{type}'\n";
}
if ($mount_path) {
- if ($isBase || defined($snapname)) {
- PVE::Tools::run_command(['mount', '-o', "ro", @extra_opts, $path, $mount_path]);
+ if ($format eq 'iso') {
+ PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
+ } elsif ($isBase || defined($snapname)) {
+ PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
} else {
PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
}
} else {
die "unsupported image format '$format'\n";
}
- } elsif ($volid =~ m|^/dev/.+|) {
+ } elsif ($type eq 'device') {
PVE::Tools::run_command(['mount', $volid, $mount_path]) if $mount_path;
return wantarray ? ($volid, 0) : $volid;
- } elsif ($volid !~ m|^/dev/.+| && $volid =~ m|^/.+| && -d $volid) {
+ } elsif ($type eq 'bind' && -d $volid) {
+ &$check_mount_path($volid);
PVE::Tools::run_command(['mount', '-o', 'bind', $volid, $mount_path]) if $mount_path;
return wantarray ? ($volid, 0) : $volid;
}
my $volid = $mountpoint->{volume};
- return if !$volid || $volid =~ m|^/|;
+ return if !$volid || $mountpoint->{type} ne 'volume';
my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
return if !$sid;
}
sub mkfs {
- my ($dev) = @_;
+ my ($dev, $rootuid, $rootgid) = @_;
- PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp', $dev]);
+ PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
+ '-E', "root_owner=$rootuid:$rootgid",
+ $dev]);
}
sub format_disk {
- my ($storage_cfg, $volid) = @_;
+ my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
if ($volid =~ m!^/dev/.+!) {
mkfs($volid);
die "cannot format volume '$volid' (format == $format)\n"
if $format ne 'raw';
- mkfs($path);
+ mkfs($path, $rootuid, $rootgid);
}
sub destroy_disks {
my $vollist = [];
eval {
+ my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+ my $chown_vollist = [];
+
foreach_mountpoint($settings, sub {
my ($ms, $mountpoint) = @_;
if ($size_kb > 0) {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
undef, $size_kb);
- format_disk($storecfg, $volid);
+ format_disk($storecfg, $volid, $rootuid, $rootgid);
} else {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
undef, 0);
+ push @$chown_vollist, $volid;
}
} elsif ($scfg->{type} eq 'zfspool') {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
undef, $size_kb);
+ push @$chown_vollist, $volid;
} elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm') {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
- format_disk($storecfg, $volid);
+ format_disk($storecfg, $volid, $rootuid, $rootgid);
} elsif ($scfg->{type} eq 'rbd') {
die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
- format_disk($storecfg, $volid);
+ format_disk($storecfg, $volid, $rootuid, $rootgid);
} else {
die "unable to create containers on storage type '$scfg->{type}'\n";
}
push @$vollist, $volid;
- my $new_mountpoint = { volume => $volid, size => $size_kb*1024, mp => $mp };
- $conf->{$ms} = print_ct_mountpoint($new_mountpoint, $ms eq 'rootfs');
+ $mountpoint->{volume} = $volid;
+ $mountpoint->{size} = $size_kb * 1024;
+ $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
} else {
# use specified/existing volid
}
});
+
+ PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
+ foreach my $volid (@$chown_vollist) {
+ my $path = PVE::Storage::path($storecfg, $volid, undef);
+ chown($rootuid, $rootgid, $path);
+ }
+ PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
};
# free allocated images on error
if (my $err = $@) {
return &$complete_ctid_full(1);
}
+sub parse_id_maps {
+ my ($conf) = @_;
+
+ my $id_map = [];
+ my $rootuid = 0;
+ my $rootgid = 0;
+
+ my $lxc = $conf->{lxc};
+ foreach my $entry (@$lxc) {
+ my ($key, $value) = @$entry;
+ next if $key ne 'lxc.id_map';
+ if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
+ my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
+ push @$id_map, [$type, $ct, $host, $length];
+ if ($ct == 0) {
+ $rootuid = $host if $type eq 'u';
+ $rootgid = $host if $type eq 'g';
+ }
+ } else {
+ die "failed to parse id_map: $value\n";
+ }
+ }
+
+ if (!@$id_map && $conf->{unprivileged}) {
+ # Should we read them from /etc/subuid?
+ $id_map = [ ['u', '0', '100000', '65536'],
+ ['g', '0', '100000', '65536'] ];
+ $rootuid = $rootgid = 100000;
+ }
+
+ return ($id_map, $rootuid, $rootgid);
+}
+
+sub userns_command {
+ my ($id_map) = @_;
+ if (@$id_map) {
+ return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
+ }
+ return [];
+}
+
1;