use PVE::Network;
use PVE::AccessControl;
use PVE::ProcFSTools;
+use Time::HiRes qw (gettimeofday);
use Data::Dumper;
my $nodename = PVE::INotify::nodename();
+my $cpuinfo= PVE::ProcFSTools::read_cpuinfo();
+
+our $COMMON_TAR_FLAGS = [ '--sparse', '--numeric-owner', '--acls',
+ '--xattrs',
+ '--xattrs-include=user.*',
+ '--xattrs-include=security.capability',
+ '--warning=no-xattr-write' ];
+
cfs_register_file('/lxc/', \&parse_pct_config, \&write_pct_config);
my $rootfs_desc = {
volume => {
type => 'string',
default_key => 1,
+ format => 'pve-lxc-mp-string',
format_description => 'volume',
description => 'Volume, device or directory to mount into the container.',
},
optional => 1,
},
size => {
- type => 'disk-size',
+ type => 'string',
+ format => 'disk-size',
format_description => 'DiskSize',
description => 'Volume size (read only value).',
optional => 1,
},
+ acl => {
+ type => 'boolean',
+ format_description => 'acl',
+ description => 'Explicitly enable or disable ACL support.',
+ optional => 1,
+ },
+ ro => {
+ type => 'boolean',
+ format_description => 'ro',
+ description => 'Read-only mountpoint (not supported with bind mounts)',
+ optional => 1,
+ },
};
PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
ostype => {
optional => 1,
type => 'string',
- enum => ['debian', 'ubuntu', 'centos', 'archlinux'],
+ enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
},
console => {
cpulimit => {
optional => 1,
type => 'number',
- description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has total of '2' CPU time. Value '0' indicates no CPU limit.",
+ description => "Limit of CPU usage. Note if the computer has 2 CPUs, it has a total of '2' CPU time. Value '0' indicates no CPU limit.",
minimum => 0,
maximum => 128,
default => 0,
cpuunits => {
optional => 1,
type => 'integer',
- description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
+ description => "CPU weight for a VM. Argument is used in the kernel fair scheduler. The larger the number is, the more CPU time this VM gets. Number is relative to the weights of all the other running VMs.\n\nNOTE: You can disable fair-scheduler configuration by setting this to 0.",
minimum => 0,
maximum => 500000,
default => 1024,
searchdomain => {
optional => 1,
type => 'string', format => 'dns-name-list',
- description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
+ description => "Sets DNS search domains for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
},
nameserver => {
optional => 1,
type => 'string', format => 'address-list',
- description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain or nameserver.",
+ description => "Sets DNS server IP address for a container. Create will automatically use the setting from the host if you neither set searchdomain nor nameserver.",
},
rootfs => get_standard_option('pve-ct-rootfs'),
parent => {
protection => {
optional => 1,
type => 'boolean',
- description => "Sets the protection flag of the container. This will prevent the remove operation. This will prevent the CT or CT's disk remove/update operation.",
+ description => "Sets the protection flag of the container. This will prevent the CT or CT's disk remove/update operation.",
+ default => 0,
+ },
+ unprivileged => {
+ optional => 1,
+ type => 'boolean',
+ description => "Makes the container run as unprivileged user. (Should not be modified manually.)",
default => 0,
},
};
'lxc.mount' => 1,
'lxc.mount.entry' => 1,
'lxc.mount.auto' => 1,
- 'lxc.rootfs' => 1,
+ 'lxc.rootfs' => 'lxc.rootfs is auto generated from rootfs',
'lxc.rootfs.mount' => 1,
- 'lxc.rootfs.options' => 1,
+ 'lxc.rootfs.options' => 'lxc.rootfs.options is not supported' .
+ ', please use mountpoint options in the "rootfs" key',
# lxc.cgroup.*
'lxc.cap.drop' => 1,
'lxc.cap.keep' => 1,
'lxc.start.order' => 1,
'lxc.group' => 1,
'lxc.environment' => 1,
- 'lxc.' => 1,
- 'lxc.' => 1,
- 'lxc.' => 1,
- 'lxc.' => 1,
};
my $netconf_desc = {
format_description => 'VlanNo',
minimum => '2',
maximum => '4094',
- description => "VLAN tag foro this interface.",
+ description => "VLAN tag for this interface.",
+ optional => 1,
+ },
+ trunks => {
+ type => 'string',
+ pattern => qr/\d+(?:;\d+)*/,
+ format_description => 'vlanid[;vlanid...]',
+ description => "VLAN ids to pass through the interface",
optional => 1,
},
};
};
}
+PVE::JSONSchema::register_format('pve-lxc-mp-string', \&verify_lxc_mp_string);
+sub verify_lxc_mp_string{
+ my ($mp, $noerr) = @_;
+
+ # do not allow:
+ # /./ or /../
+ # /. or /.. at the end
+ # ../ at the beginning
+
+ if($mp =~ m@/\.\.?/@ ||
+ $mp =~ m@/\.\.?$@ ||
+ $mp =~ m@^\.\./@){
+ return undef if $noerr;
+ die "$mp contains illegal character sequences\n";
+ }
+ return $mp;
+}
+
my $mp_desc = {
%$rootfs_desc,
mp => {
type => 'string',
+ format => 'pve-lxc-mp-string',
format_description => 'Path',
description => 'Path to the mountpoint as seen from inside the container.',
- optional => 1,
},
};
PVE::JSONSchema::register_format('pve-ct-mountpoint', $mp_desc);
if ($line =~ m/^(lxc\.[a-z0-9_\-\.]+)(:|\s*=)\s*(.*?)\s*$/) {
my $key = $1;
my $value = $3;
- if ($valid_lxc_conf_keys->{$key} || $key =~ m/^lxc\.cgroup\./) {
+ my $validity = $valid_lxc_conf_keys->{$key} || 0;
+ if ($validity eq 1 || $key =~ m/^lxc\.cgroup\./) {
push @{$conf->{lxc}}, [$key, $value];
+ } elsif (my $errmsg = $validity) {
+ warn "vm $vmid - $key: $errmsg\n";
} else {
warn "vm $vmid - unable to parse config: $line\n";
}
my $cfspath = cfs_config_path($vmid, $node);
my $conf = PVE::Cluster::cfs_read_file($cfspath);
- die "container $vmid does not exists\n" if !defined($conf);
+ die "container $vmid does not exist\n" if !defined($conf);
return $conf;
}
}
# flock: we use one file handle per process, so lock file
-# can be called multiple times and succeeds for the same process.
+# can be called multiple times and will succeed for the same process.
my $lock_handles = {};
my $lockdir = "/run/lock/lxc";
return "$lockdir/pve-config-${vmid}.lock";
}
-sub lock_aquire {
- my ($vmid, $timeout) = @_;
+sub lock_container {
+ my ($vmid, $timeout, $code, @param) = @_;
$timeout = 10 if !$timeout;
- my $mode = LOCK_EX;
my $filename = lock_filename($vmid);
mkdir $lockdir if !-d $lockdir;
- my $lock_func = sub {
- if (!$lock_handles->{$$}->{$filename}) {
- my $fh = new IO::File(">>$filename") ||
- die "can't open file - $!\n";
- $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
- }
-
- if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode |LOCK_NB)) {
- print STDERR "trying to aquire lock...";
- my $success;
- while(1) {
- $success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
- # try again on EINTR (see bug #273)
- if ($success || ($! != EINTR)) {
- last;
- }
- }
- if (!$success) {
- print STDERR " failed\n";
- die "can't aquire lock - $!\n";
- }
-
- print STDERR " OK\n";
- }
-
- $lock_handles->{$$}->{$filename}->{refcount}++;
- };
-
- eval { PVE::Tools::run_with_timeout($timeout, $lock_func); };
- my $err = $@;
- if ($err) {
- die "can't lock file '$filename' - $err";
- }
-}
-
-sub lock_release {
- my ($vmid) = @_;
-
- my $filename = lock_filename($vmid);
-
- if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
- my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
- if ($refcount <= 0) {
- $lock_handles->{$$}->{$filename} = undef;
- close ($fh);
- }
- }
-}
-
-sub lock_container {
- my ($vmid, $timeout, $code, @param) = @_;
-
- my $res;
+ my $res = PVE::Tools::lock_file_full($filename, $timeout, 0, $code, @param);
- lock_aquire($vmid, $timeout);
- eval { $res = &$code(@param) };
- my $err = $@;
- lock_release($vmid);
-
- die $err if $err;
+ die $@ if $@;
return $res;
}
}
sub get_container_disk_usage {
+ my ($vmid, $pid) = @_;
+
+ return PVE::Tools::df("/proc/$pid/root/", 1);
+}
+
+my $last_proc_vmid_stat;
+
+my $parse_cpuacct_stat = sub {
my ($vmid) = @_;
- my $cmd = ['lxc-attach', '-n', $vmid, '--', 'df', '-P', '-B', '1', '/'];
+ my $raw = read_cgroup_value('cpuacct', $vmid, 'cpuacct.stat', 1);
- my $res = {
- total => 0,
- used => 0,
- avail => 0,
- };
+ my $stat = {};
- my $parser = sub {
- my $line = shift;
- if (my ($fsid, $total, $used, $avail) = $line =~
- m/^(\S+.*)\s+(\d+)\s+(\d+)\s+(\d+)\s+\d+%\s.*$/) {
- $res = {
- total => $total,
- used => $used,
- avail => $avail,
- };
- }
- };
- eval { PVE::Tools::run_command($cmd, timeout => 1, outfunc => $parser); };
- warn $@ if $@;
+ if ($raw =~ m/^user (\d+)\nsystem (\d+)\n/) {
- return $res;
-}
+ $stat->{utime} = $1;
+ $stat->{stime} = $2;
+
+ }
+
+ return $stat;
+};
sub vmstatus {
my ($opt_vmid) = @_;
my $active_hash = list_active_containers();
+ my $cpucount = $cpuinfo->{cpus} || 1;
+
+ my $cdtime = gettimeofday;
+
+ my $uptime = (PVE::ProcFSTools::read_proc_uptime(1))[0];
+
foreach my $vmid (keys %$list) {
my $d = $list->{$vmid};
- my $running = defined($active_hash->{$vmid});
+ eval { $d->{pid} = find_lxc_pid($vmid) if defined($active_hash->{$vmid}); };
+ warn $@ if $@; # ignore errors (consider them stopped)
- $d->{status} = $running ? 'running' : 'stopped';
+ $d->{status} = $d->{pid} ? 'running' : 'stopped';
my $cfspath = cfs_config_path($vmid);
my $conf = PVE::Cluster::cfs_read_file($cfspath) || {};
$d->{name} = $conf->{'hostname'} || "CT$vmid";
$d->{name} =~ s/[\s]//g;
- $d->{cpus} = $conf->{cpulimit} // 0;
+ $d->{cpus} = $conf->{cpulimit} || $cpucount;
- if ($running) {
- my $res = get_container_disk_usage($vmid);
+ if ($d->{pid}) {
+ my $res = get_container_disk_usage($vmid, $d->{pid});
$d->{disk} = $res->{used};
$d->{maxdisk} = $res->{total};
} else {
$d->{disk} = 0;
# use 4GB by default ??
if (my $rootfs = $conf->{rootfs}) {
- my $rootinfo = parse_ct_mountpoint($rootfs);
+ my $rootinfo = parse_ct_rootfs($rootfs);
$d->{maxdisk} = int(($rootinfo->{size} || 4)*1024*1024)*1024;
} else {
$d->{maxdisk} = 4*1024*1024*1024;
foreach my $vmid (keys %$list) {
my $d = $list->{$vmid};
- next if $d->{status} ne 'running';
+ my $pid = $d->{pid};
+
+ next if !$pid; # skip stopped CTs
- my $pid = find_lxc_pid($vmid);
my $ctime = (stat("/proc/$pid"))[10]; # 10 = ctime
$d->{uptime} = time - $ctime; # the method lxcfs uses
$d->{diskwrite} = $2 if $key eq 'Write';
}
}
+
+ my $pstat = &$parse_cpuacct_stat($vmid);
+
+ my $used = $pstat->{utime} + $pstat->{stime};
+
+ my $old = $last_proc_vmid_stat->{$vmid};
+ if (!$old) {
+ $last_proc_vmid_stat->{$vmid} = {
+ time => $cdtime,
+ used => $used,
+ cpu => 0,
+ };
+ next;
+ }
+
+ my $dtime = ($cdtime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
+
+ if ($dtime > 1000) {
+ my $dutime = $used - $old->{used};
+
+ $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
+ $last_proc_vmid_stat->{$vmid} = {
+ time => $cdtime,
+ used => $used,
+ cpu => $d->{cpu},
+ };
+ } else {
+ $d->{cpu} = $old->{cpu};
+ }
+ }
+
+ my $netdev = PVE::ProcFSTools::read_proc_net_dev();
+
+ foreach my $dev (keys %$netdev) {
+ next if $dev !~ m/^veth([1-9]\d*)i/;
+ my $vmid = $1;
+ my $d = $list->{$vmid};
+
+ next if !$d;
+
+ $d->{netout} += $netdev->{$dev}->{receive};
+ $d->{netin} += $netdev->{$dev}->{transmit};
+
}
return $list;
}
-sub parse_ct_mountpoint {
- my ($data, $noerr) = @_;
+sub classify_mountpoint {
+ my ($vol) = @_;
+ if ($vol =~ m!^/!) {
+ return 'device' if $vol =~ m!^/dev/!;
+ return 'bind';
+ }
+ return 'volume';
+}
+
+my $parse_ct_mountpoint_full = sub {
+ my ($desc, $data, $noerr) = @_;
$data //= '';
my $res;
- eval { $res = PVE::JSONSchema::parse_property_string($mp_desc, $data) };
+ eval { $res = PVE::JSONSchema::parse_property_string($desc, $data) };
if ($@) {
return undef if $noerr;
die $@;
}
- if (!defined($res->{volume})) {
- return undef if $noerr;
- die "no volume set on mountpoint\n";
- }
-
- if (my $size = $res->{size}) {
+ if (defined(my $size = $res->{size})) {
$size = PVE::JSONSchema::parse_size($size);
if (!defined($size)) {
return undef if $noerr;
$res->{size} = $size;
}
+ $res->{type} = classify_mountpoint($res->{volume});
+
return $res;
+};
+
+sub parse_ct_rootfs {
+ my ($data, $noerr) = @_;
+
+ my $res = &$parse_ct_mountpoint_full($rootfs_desc, $data, $noerr);
+
+ $res->{mp} = '/' if defined($res);
+
+ return $res;
+}
+
+sub parse_ct_mountpoint {
+ my ($data, $noerr) = @_;
+
+ return &$parse_ct_mountpoint_full($mp_desc, $data, $noerr);
}
sub print_ct_mountpoint {
my ($info, $nomp) = @_;
- my $skip = $nomp ? ['mp'] : [];
+ my $skip = [ 'type' ];
+ push @$skip, 'mp' if $nomp;
return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
}
my @args = split(/\0/, $cmdline);
- # serach for lxc-console -n <vmid>
+ # search for lxc-console -n <vmid>
return if scalar(@args) != 3;
return if $args[1] ne '-n';
return if $args[2] !~ m/^\d+$/;
return $pid;
}
-my $ipv4_reverse_mask = [
- '0.0.0.0',
- '128.0.0.0',
- '192.0.0.0',
- '224.0.0.0',
- '240.0.0.0',
- '248.0.0.0',
- '252.0.0.0',
- '254.0.0.0',
- '255.0.0.0',
- '255.128.0.0',
- '255.192.0.0',
- '255.224.0.0',
- '255.240.0.0',
- '255.248.0.0',
- '255.252.0.0',
- '255.254.0.0',
- '255.255.0.0',
- '255.255.128.0',
- '255.255.192.0',
- '255.255.224.0',
- '255.255.240.0',
- '255.255.248.0',
- '255.255.252.0',
- '255.255.254.0',
- '255.255.255.0',
- '255.255.255.128',
- '255.255.255.192',
- '255.255.255.224',
- '255.255.255.240',
- '255.255.255.248',
- '255.255.255.252',
- '255.255.255.254',
- '255.255.255.255',
-];
-
# Note: we cannot use Net:IP, because that only allows strict
# CIDR networks
sub parse_ipv4_cidr {
my ($cidr, $noerr) = @_;
- if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 < 32)) {
- return { address => $1, netmask => $ipv4_reverse_mask->[$2] };
+ if ($cidr =~ m!^($IPV4RE)(?:/(\d+))$! && ($2 > 7) && ($2 <= 32)) {
+ return { address => $1, netmask => $PVE::Network::ipv4_reverse_mask->[$2] };
}
return undef if $noerr;
die "missing 'arch' - internal error" if !$conf->{arch};
$raw .= "lxc.arch = $conf->{arch}\n";
+ my $unprivileged = $conf->{unprivileged};
+ my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
+
my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
- if ($ostype =~ /^(?:debian | ubuntu | centos | archlinux)$/x) {
+ if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
$raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
+ if ($unprivileged || $custom_idmap) {
+ $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
+ }
} else {
- die "implement me";
+ die "implement me (ostype $ostype)";
+ }
+
+ $raw .= "lxc.monitor.unshare = 1\n";
+
+ # Should we read them from /etc/subuid?
+ if ($unprivileged && !$custom_idmap) {
+ $raw .= "lxc.id_map = u 0 100000 65536\n";
+ $raw .= "lxc.id_map = g 0 100000 65536\n";
}
if (!has_dev_console($conf)) {
my $ttycount = get_tty_count($conf);
$raw .= "lxc.tty = $ttycount\n";
- # some init scripts expects a linux terminal (turnkey).
+ # some init scripts expect a linux terminal (turnkey).
$raw .= "lxc.environment = TERM=linux\n";
my $utsname = $conf->{hostname} || "CT$vmid";
my $shares = $conf->{cpuunits} || 1024;
$raw .= "lxc.cgroup.cpu.shares = $shares\n";
- my $mountpoint = parse_ct_mountpoint($conf->{rootfs});
- $mountpoint->{mp} = '/';
-
- my ($path, $use_loopdev) = mountpoint_mount_path($mountpoint, $storage_cfg);
- $path = "loop:$path" if $use_loopdev;
+ my $mountpoint = parse_ct_rootfs($conf->{rootfs});
- $raw .= "lxc.rootfs = $path\n";
+ $raw .= "lxc.rootfs = $dir/rootfs\n";
my $netcount = 0;
foreach my $k (keys %$conf) {
sub add_unused_volume {
my ($config, $volid) = @_;
- # skip bind mounts and block devices
- return if $volid =~ m|^/|;
-
my $key;
for (my $ind = $MAX_UNUSED_DISKS - 1; $ind >= 0; $ind--) {
my $test = "unused$ind";
}
}
- die "To many unused volume - please delete them first.\n" if !$key;
+ die "Too many unused volumes - please delete them first.\n" if !$key;
$config->{$key} = $volid;
if (defined($delete)) {
foreach my $opt (@$delete) {
+ if (!exists($conf->{$opt})) {
+ warn "no such option: $opt\n";
+ next;
+ }
+
if ($opt eq 'hostname' || $opt eq 'memory' || $opt eq 'rootfs') {
die "unable to delete required option '$opt'\n";
} elsif ($opt eq 'swap') {
next if $hotplug_error->($opt);
check_protection($conf, "can't remove CT $vmid drive '$opt'");
my $mountpoint = parse_ct_mountpoint($conf->{$opt});
- add_unused_volume($conf, $mountpoint->{volume});
+ if ($mountpoint->{type} eq 'volume') {
+ add_unused_volume($conf, $mountpoint->{volume})
+ }
delete $conf->{$opt};
+ } elsif ($opt eq 'unprivileged') {
+ die "unable to delete read-only option: '$opt'\n";
} else {
- die "implement me"
+ die "implement me (delete: $opt)"
}
write_config($vmid, $conf) if $running;
}
my $wanted_swap = PVE::Tools::extract_param($param, 'swap');
if (defined($wanted_memory) || defined($wanted_swap)) {
- $wanted_memory //= ($conf->{memory} || 512);
- $wanted_swap //= ($conf->{swap} || 0);
+ my $old_memory = ($conf->{memory} || 512);
+ my $old_swap = ($conf->{swap} || 0);
+
+ $wanted_memory //= $old_memory;
+ $wanted_swap //= $old_swap;
my $total = $wanted_memory + $wanted_swap;
if ($running) {
- write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
- write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
+ my $old_total = $old_memory + $old_swap;
+ if ($total > $old_total) {
+ write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
+ write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
+ } else {
+ write_cgroup_value("memory", $vmid, "memory.limit_in_bytes", int($wanted_memory*1024*1024));
+ write_cgroup_value("memory", $vmid, "memory.memsw.limit_in_bytes", int($total*1024*1024));
+ }
}
$conf->{memory} = $wanted_memory;
$conf->{swap} = $wanted_swap;
} elsif ($opt eq 'rootfs') {
check_protection($conf, "can't update CT $vmid drive '$opt'");
die "implement me: $opt";
+ } elsif ($opt eq 'unprivileged') {
+ die "unable to modify read-only option: '$opt'\n";
} else {
die "implement me: $opt";
}
}
my $ipv6 = $net->{ip6};
if ($ipv6) {
- if ($ipv6 =~ /^(dhcp|manual)$/) {
+ if ($ipv6 =~ /^(auto|dhcp|manual)$/) {
$ipv6 = undef;
} else {
$ipv6 =~ s!/\d+$!!;
sub delete_mountpoint_volume {
my ($storage_cfg, $vmid, $volume) = @_;
- # skip bind mounts and block devices
- if ($volume =~ m|^/|) {
- return;
- }
+ return if classify_mountpoint($volume) ne 'volume';
my ($vtype, $name, $owner) = PVE::Storage::parse_volname($storage_cfg, $volume);
PVE::Storage::vdisk_free($storage_cfg, $volume) if $vmid == $owner;
write_config($vmid, $conf);
}
- PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall});
+ PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
foreach (qw(bridge tag firewall)) {
$oldnet->{$_} = $newnet->{$_} if $newnet->{$_};
}
my $eth = $newnet->{name};
PVE::Network::veth_create($veth, $vethpeer, $newnet->{bridge}, $newnet->{hwaddr});
- PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall});
+ PVE::Network::tap_plug($veth, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks});
# attach peer in container
my $cmd = ['lxc-device', '-n', $vmid, 'add', $vethpeer, "$eth" ];
return if !$change_ip && !$change_gw;
# step 1: add new IP, if this fails we cancel
- if ($change_ip && $newip && $newip !~ /^(?:auto|dhcp)$/) {
+ my $is_real_ip = ($newip && $newip !~ /^(?:auto|dhcp|manual)$/);
+ if ($change_ip && $is_real_ip) {
eval { &$ipcmd($family_opt, 'addr', 'add', $newip, 'dev', $eth); };
if (my $err = $@) {
warn $err;
# Note: 'ip route replace' can add
if ($change_gw) {
if ($newgw) {
- eval { &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw); };
+ eval {
+ if ($is_real_ip && !PVE::Network::is_ip_in_cidr($newgw, $newip, $ipversion)) {
+ &$ipcmd($family_opt, 'route', 'add', $newgw, 'dev', $eth);
+ }
+ &$ipcmd($family_opt, 'route', 'replace', 'default', 'via', $newgw);
+ };
if (my $err = $@) {
warn $err;
# the route was not replaced, the old IP is still available
# Internal snapshots
# NOTE: Snapshot create/delete involves several non-atomic
-# action, and can take a long time.
-# So we try to avoid locking the file and use 'lock' variable
+# actions, and can take a long time.
+# So we try to avoid locking the file and use the 'lock' variable
# inside the config file instead.
my $snapshot_copy_config = sub {
if defined($conf->{snapshots}->{$snapname});
my $storecfg = PVE::Storage::config();
- die "snapshot feature is not available\n" if !has_feature('snapshot', $conf, $storecfg);
+ my $feature = $snapname eq 'vzdump' ? 'vzdump' : 'snapshot';
+ die "snapshot feature is not available\n" if !has_feature($feature, $conf, $storecfg);
$snap = $conf->{snapshots}->{$snapname} = {};
my ($feature, $conf, $storecfg, $snapname) = @_;
my $err;
+ my $vzdump = $feature eq 'vzdump';
+ $feature = 'snapshot' if $vzdump;
foreach_mountpoint($conf, sub {
my ($ms, $mountpoint) = @_;
return if $err; # skip further test
+ return if $vzdump && $ms ne 'rootfs' && !$mountpoint->{backup};
$err = 1 if !PVE::Storage::volume_has_feature($storecfg, $feature, $mountpoint->{volume}, $snapname);
my $conf = load_config($vmid);
my $running = check_running($vmid);
+
+ my $unfreeze = 0;
+
eval {
if ($running) {
PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
+ $unfreeze = 1;
PVE::Tools::run_command(['/bin/sync']);
};
my $storecfg = PVE::Storage::config();
- my $rootinfo = parse_ct_mountpoint($conf->{rootfs});
+ my $rootinfo = parse_ct_rootfs($conf->{rootfs});
my $volid = $rootinfo->{volume};
- if ($running) {
- PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]);
- };
-
PVE::Storage::volume_snapshot($storecfg, $volid, $snapname);
&$snapshot_commit($vmid, $snapname);
};
- if(my $err = $@) {
+ my $err = $@;
+
+ if ($unfreeze) {
+ eval { PVE::Tools::run_command(['/usr/bin/lxc-unfreeze', '-n', $vmid]); };
+ warn $@ if $@;
+ }
+
+ if ($err) {
snapshot_delete($vmid, $snapname, 1);
die "$err\n";
}
my $storecfg = PVE::Storage::config();
- my $del_snap = sub {
+ my $unlink_parent = sub {
- check_lock($conf);
+ my ($confref, $new_parent) = @_;
- if ($conf->{parent} eq $snapname) {
- if ($conf->{snapshots}->{$snapname}->{snapname}) {
- $conf->{parent} = $conf->{snapshots}->{$snapname}->{parent};
+ if ($confref->{parent} && $confref->{parent} eq $snapname) {
+ if ($new_parent) {
+ $confref->{parent} = $new_parent;
} else {
- delete $conf->{parent};
+ delete $confref->{parent};
}
}
+ };
+
+ my $del_snap = sub {
+
+ check_lock($conf);
+
+ my $parent = $conf->{snapshots}->{$snapname}->{parent};
+ foreach my $snapkey (keys %{$conf->{snapshots}}) {
+ &$unlink_parent($conf->{snapshots}->{$snapkey}, $parent);
+ }
+
+ &$unlink_parent($conf, $parent);
delete $conf->{snapshots}->{$snapname};
};
my $rootfs = $conf->{snapshots}->{$snapname}->{rootfs};
- my $rootinfo = parse_ct_mountpoint($rootfs);
+ my $rootinfo = parse_ct_rootfs($rootfs);
my $volid = $rootinfo->{volume};
eval {
die "snapshot '$snapname' does not exist\n" if !defined($snap);
my $rootfs = $snap->{rootfs};
- my $rootinfo = parse_ct_mountpoint($rootfs);
+ my $rootinfo = parse_ct_rootfs($rootfs);
my $volid = $rootinfo->{volume};
PVE::Storage::volume_rollback_is_possible($storecfg, $volid, $snapname);
my $storecfg = PVE::Storage::config();
- my $rootinfo = parse_ct_mountpoint($conf->{rootfs});
+ my $rootinfo = parse_ct_rootfs($conf->{rootfs});
my $volid = $rootinfo->{volume};
die "Template feature is not available for '$volid'\n"
return $reverse ? reverse @names : @names;
}
-# The container might have *different* symlinks than the host. realpath/abs_path
-# use the actual filesystem to resolve links.
-sub sanitize_mountpoint {
- my ($mp) = @_;
- $mp = '/' . $mp; # we always start with a slash
- $mp =~ s@/{2,}@/@g; # collapse sequences of slashes
- $mp =~ s@/\./@@g; # collapse /./
- $mp =~ s@/\.(/)?$@$1@; # collapse a trailing /. or /./
- $mp =~ s@(.*)/[^/]+/\.\./@$1/@g; # collapse /../ without regard for symlinks
- $mp =~ s@/\.\.(/)?$@$1@; # collapse trailing /.. or /../ disregarding symlinks
- return $mp;
-}
sub foreach_mountpoint_full {
my ($conf, $reverse, $func) = @_;
foreach my $key (mountpoint_names($reverse)) {
my $value = $conf->{$key};
next if !defined($value);
- my $mountpoint = parse_ct_mountpoint($value, 1);
+ my $mountpoint = $key eq 'rootfs' ? parse_ct_rootfs($value, 1) : parse_ct_mountpoint($value, 1);
next if !defined($mountpoint);
- # just to be sure: rootfs is /
- my $path = $key eq 'rootfs' ? '/' : $mountpoint->{mp};
- $mountpoint->{mp} = sanitize_mountpoint($path);
-
- $path = $mountpoint->{volume};
- $mountpoint->{volume} = sanitize_mountpoint($path) if $path =~ m|^/|;
-
&$func($key, $mountpoint);
}
}
foreach_mountpoint($conf, sub {
my ($ms, $mountpoint) = @_;
- my $volid = $mountpoint->{volume};
- my $mount = $mountpoint->{mp};
-
- return if !$volid || !$mount;
-
- my $image_path = PVE::Storage::path($storage_cfg, $volid);
- my ($vtype, undef, undef, undef, undef, $isBase, $format) =
- PVE::Storage::parse_volname($storage_cfg, $volid);
-
- die "unable to mount base volume - internal error" if $isBase;
-
mountpoint_mount($mountpoint, $rootdir, $storage_cfg);
});
};
if (my $err = $@) {
- warn "mounting container failed - $err";
+ warn "mounting container failed\n";
umount_all($vmid, $storage_cfg, $conf, 1);
+ die $err;
}
return $rootdir;
}
};
+sub query_loopdev {
+ my ($path) = @_;
+ my $found;
+ my $parser = sub {
+ my $line = shift;
+ if ($line =~ m@^(/dev/loop\d+):@) {
+ $found = $1;
+ }
+ };
+ my $cmd = ['losetup', '--associated', $path];
+ PVE::Tools::run_command($cmd, outfunc => $parser);
+ return $found;
+}
+
# use $rootdir = undef to just return the corresponding mount path
sub mountpoint_mount {
my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
my $volid = $mountpoint->{volume};
my $mount = $mountpoint->{mp};
+ my $type = $mountpoint->{type};
return if !$volid || !$mount;
die "unknown snapshot path for '$volid'" if !$storage && defined($snapname);
+ my $optstring = '';
+ if (defined($mountpoint->{acl})) {
+ $optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
+ }
+ if ($mountpoint->{ro}) {
+ $optstring .= ',' if $optstring;
+ $optstring .= 'ro';
+ }
+
+ my @extra_opts = ('-o', $optstring);
+
if ($storage) {
my $scfg = PVE::Storage::storage_config($storage_cfg, $storage);
my ($vtype, undef, undef, undef, undef, $isBase, $format) =
PVE::Storage::parse_volname($storage_cfg, $volid);
+ $format = 'iso' if $vtype eq 'iso'; # allow to handle iso files
+
if ($format eq 'subvol') {
if ($mount_path) {
if ($snapname) {
if ($scfg->{type} eq 'zfspool') {
my $path_arg = $path;
$path_arg =~ s!^/+!!;
- PVE::Tools::run_command(['mount', '-o', 'ro', '-t', 'zfs', $path_arg, $mount_path]);
+ PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, '-t', 'zfs', $path_arg, $mount_path]);
} else {
die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
}
} else {
- PVE::Tools::run_command(['mount', '-o', 'bind', $path, $mount_path]);
+ if ($mountpoint->{ro}) {
+ die "read-only bind mounts not supported\n";
+ }
+ PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
}
}
return wantarray ? ($path, 0) : $path;
- } elsif ($format eq 'raw') {
+ } elsif ($format eq 'raw' || $format eq 'iso') {
my $use_loopdev = 0;
- my @extra_opts;
if ($scfg->{path}) {
push @extra_opts, '-o', 'loop';
$use_loopdev = 1;
- } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'rbd') {
+ } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
+ $scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
# do nothing
} else {
die "unsupported storage type '$scfg->{type}'\n";
}
if ($mount_path) {
- if ($isBase || defined($snapname)) {
+ if ($format eq 'iso') {
+ PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
+ } elsif ($isBase || defined($snapname)) {
PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
} else {
PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
} else {
die "unsupported image format '$format'\n";
}
- } elsif ($volid =~ m|^/dev/.+|) {
- PVE::Tools::run_command(['mount', $volid, $mount_path]) if $mount_path;
+ } elsif ($type eq 'device') {
+ PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
return wantarray ? ($volid, 0) : $volid;
- } elsif ($volid !~ m|^/dev/.+| && $volid =~ m|^/.+| && -d $volid) {
+ } elsif ($type eq 'bind') {
+ if ($mountpoint->{ro}) {
+ die "read-only bind mounts not supported\n";
+ # Theoretically we'd have to execute both:
+ # mount -o bind $a $b
+ # mount -o bind,remount,ro $a $b
+ }
+ die "directory '$volid' does not exist\n" if ! -d $volid;
&$check_mount_path($volid);
- PVE::Tools::run_command(['mount', '-o', 'bind', $volid, $mount_path]) if $mount_path;
+ PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
return wantarray ? ($volid, 0) : $volid;
}
my $volid = $mountpoint->{volume};
- return if !$volid || $volid =~ m|^/|;
+ return if !$volid || $mountpoint->{type} ne 'volume';
my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
return if !$sid;
}
sub mkfs {
- my ($dev) = @_;
+ my ($dev, $rootuid, $rootgid) = @_;
- PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp', $dev]);
+ PVE::Tools::run_command(['mkfs.ext4', '-O', 'mmp',
+ '-E', "root_owner=$rootuid:$rootgid",
+ $dev]);
}
sub format_disk {
- my ($storage_cfg, $volid) = @_;
+ my ($storage_cfg, $volid, $rootuid, $rootgid) = @_;
if ($volid =~ m!^/dev/.+!) {
mkfs($volid);
die "cannot format volume '$volid' (format == $format)\n"
if $format ne 'raw';
- mkfs($path);
+ mkfs($path, $rootuid, $rootgid);
}
sub destroy_disks {
my $vollist = [];
eval {
+ my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+ my $chown_vollist = [];
+
foreach_mountpoint($settings, sub {
my ($ms, $mountpoint) = @_;
my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
- return if !$storage;
-
- if ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/) {
+ if ($storage && ($volid =~ m/^([^:\s]+):(\d+(\.\d+)?)$/)) {
my ($storeid, $size_gb) = ($1, $2);
my $size_kb = int(${size_gb}*1024) * 1024;
if ($size_kb > 0) {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw',
undef, $size_kb);
- format_disk($storecfg, $volid);
+ format_disk($storecfg, $volid, $rootuid, $rootgid);
} else {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
undef, 0);
+ push @$chown_vollist, $volid;
}
} elsif ($scfg->{type} eq 'zfspool') {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'subvol',
undef, $size_kb);
- } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm') {
+ push @$chown_vollist, $volid;
+ } elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' || $scfg->{type} eq 'lvmthin') {
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
- format_disk($storecfg, $volid);
+ format_disk($storecfg, $volid, $rootuid, $rootgid);
} elsif ($scfg->{type} eq 'rbd') {
die "krbd option must be enabled on storage type '$scfg->{type}'\n" if !$scfg->{krbd};
$volid = PVE::Storage::vdisk_alloc($storecfg, $storage, $vmid, 'raw', undef, $size_kb);
- format_disk($storecfg, $volid);
+ format_disk($storecfg, $volid, $rootuid, $rootgid);
} else {
die "unable to create containers on storage type '$scfg->{type}'\n";
}
push @$vollist, $volid;
- my $new_mountpoint = { volume => $volid, size => $size_kb*1024, mp => $mp };
- $conf->{$ms} = print_ct_mountpoint($new_mountpoint, $ms eq 'rootfs');
+ $mountpoint->{volume} = $volid;
+ $mountpoint->{size} = $size_kb * 1024;
+ $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
} else {
- # use specified/existing volid
+ # use specified/existing volid/dir/device
+ $conf->{$ms} = print_ct_mountpoint($mountpoint, $ms eq 'rootfs');
}
});
+
+ PVE::Storage::activate_volumes($storecfg, $chown_vollist, undef);
+ foreach my $volid (@$chown_vollist) {
+ my $path = PVE::Storage::path($storecfg, $volid, undef);
+ chown($rootuid, $rootgid, $path);
+ }
+ PVE::Storage::deactivate_volumes($storecfg, $chown_vollist, undef);
};
# free allocated images on error
if (my $err = $@) {
return &$complete_ctid_full(1);
}
+sub parse_id_maps {
+ my ($conf) = @_;
+
+ my $id_map = [];
+ my $rootuid = 0;
+ my $rootgid = 0;
+
+ my $lxc = $conf->{lxc};
+ foreach my $entry (@$lxc) {
+ my ($key, $value) = @$entry;
+ next if $key ne 'lxc.id_map';
+ if ($value =~ /^([ug])\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
+ my ($type, $ct, $host, $length) = ($1, $2, $3, $4);
+ push @$id_map, [$type, $ct, $host, $length];
+ if ($ct == 0) {
+ $rootuid = $host if $type eq 'u';
+ $rootgid = $host if $type eq 'g';
+ }
+ } else {
+ die "failed to parse id_map: $value\n";
+ }
+ }
+
+ if (!@$id_map && $conf->{unprivileged}) {
+ # Should we read them from /etc/subuid?
+ $id_map = [ ['u', '0', '100000', '65536'],
+ ['g', '0', '100000', '65536'] ];
+ $rootuid = $rootgid = 100000;
+ }
+
+ return ($id_map, $rootuid, $rootgid);
+}
+
+sub userns_command {
+ my ($id_map) = @_;
+ if (@$id_map) {
+ return ['lxc-usernsexec', (map { ('-m', join(':', @$_)) } @$id_map), '--'];
+ }
+ return [];
+}
+
1;