use warnings;
use POSIX qw(EINTR);
+use Socket;
+
use File::Path;
use File::Spec;
use Cwd qw();
-use Fcntl ':flock';
+use Fcntl qw(O_RDONLY);
use PVE::Cluster qw(cfs_register_file cfs_read_file);
use PVE::Storage;
use PVE::SafeSyslog;
use PVE::INotify;
use PVE::JSONSchema qw(get_standard_option);
-use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach);
+use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full);
use PVE::Network;
use PVE::AccessControl;
use PVE::ProcFSTools;
description => 'Read-only mountpoint (not supported with bind mounts)',
optional => 1,
},
+ quota => {
+ type => 'boolean',
+ format_description => '[0|1]',
+ description => 'Enable user quotas inside the container (not supported with zfs subvolumes)',
+ optional => 1,
+ },
};
PVE::JSONSchema::register_standard_option('pve-ct-rootfs', {
ostype => {
optional => 1,
type => 'string',
- enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux'],
+ enum => ['debian', 'ubuntu', 'centos', 'fedora', 'opensuse', 'archlinux', 'alpine'],
description => "OS type. Corresponds to lxc setup scripts in /usr/share/lxc/config/<ostype>.common.conf.",
},
console => {
my $lock_handles = {};
my $lockdir = "/run/lock/lxc";
-sub lock_filename {
+sub config_file_lock {
my ($vmid) = @_;
return "$lockdir/pve-config-${vmid}.lock";
}
-sub lock_container {
+sub lock_config_full {
my ($vmid, $timeout, $code, @param) = @_;
- $timeout = 10 if !$timeout;
+ my $filename = config_file_lock($vmid);
+
+ mkdir $lockdir if !-d $lockdir;
+
+ my $res = lock_file($filename, $timeout, $code, @param);
+
+ die $@ if $@;
+
+ return $res;
+}
- my $filename = lock_filename($vmid);
+sub lock_config_mode {
+ my ($vmid, $timeout, $shared, $code, @param) = @_;
+
+ my $filename = config_file_lock($vmid);
mkdir $lockdir if !-d $lockdir;
- my $res = PVE::Tools::lock_file_full($filename, $timeout, 0, $code, @param);
+ my $res = lock_file_full($filename, $timeout, $shared, $code, @param);
die $@ if $@;
return $res;
}
+sub lock_config {
+ my ($vmid, $code, @param) = @_;
+
+ return lock_config_full($vmid, 10, $code, @param);
+}
+
sub option_exists {
my ($name) = @_;
return $prop;
}
-sub json_config_properties_no_rootfs {
- my $prop = shift;
-
- foreach my $opt (keys %$confdesc) {
- next if $prop->{$opt};
- next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'rootfs';
- $prop->{$opt} = $confdesc->{$opt};
- }
-
- return $prop;
-}
-
# container status helpers
sub list_active_containers {
my $custom_idmap = grep { $_->[0] eq 'lxc.id_map' } @{$conf->{lxc}};
my $ostype = $conf->{ostype} || die "missing 'ostype' - internal error";
- if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux)$/x) {
- $raw .= "lxc.include = /usr/share/lxc/config/$ostype.common.conf\n";
+ if ($ostype =~ /^(?:debian | ubuntu | centos | fedora | opensuse | archlinux | alpine)$/x) {
+ my $inc ="/usr/share/lxc/config/$ostype.common.conf";
+ $inc ="/usr/share/lxc/config/common.conf" if !-f $inc;
+ $raw .= "lxc.include = $inc\n";
if ($unprivileged || $custom_idmap) {
- $raw .= "lxc.include = /usr/share/lxc/config/$ostype.userns.conf\n"
+ $inc = "/usr/share/lxc/config/$ostype.userns.conf";
+ $inc = "/usr/share/lxc/config/userns.conf" if !-f $inc;
+ $raw .= "lxc.include = $inc\n"
}
} else {
die "implement me (ostype $ostype)";
}
+ # WARNING: DO NOT REMOVE this without making sure that loop device nodes
+ # cannot be exposed to the container with r/w access (cgroup perms).
+ # When this is enabled mounts will still remain in the monitor's namespace
+ # after the container unmounted them and thus will not detach from their
+ # files while the container is running!
$raw .= "lxc.monitor.unshare = 1\n";
# Should we read them from /etc/subuid?
return join(' ', @list);
}
+sub is_volume_in_use {
+ my ($config, $volid) = @_;
+ my $used = 0;
+
+ foreach_mountpoint($config, sub {
+ my ($ms, $mountpoint) = @_;
+ return if $used;
+ if ($mountpoint->{type} eq 'volume' && $mountpoint->{volume} eq $volid) {
+ $used = 1;
+ }
+ });
+
+ return $used;
+}
+
sub add_unused_volume {
my ($config, $volid) = @_;
} elsif ($opt =~ m/^mp(\d+)$/) {
next if $hotplug_error->($opt);
check_protection($conf, "can't remove CT $vmid drive '$opt'");
- my $mountpoint = parse_ct_mountpoint($conf->{$opt});
- if ($mountpoint->{type} eq 'volume') {
- add_unused_volume($conf, $mountpoint->{volume})
- }
+ my $mp = parse_ct_mountpoint($conf->{$opt});
delete $conf->{$opt};
+ if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
+ add_unused_volume($conf, $mp->{volume});
+ }
} elsif ($opt eq 'unprivileged') {
die "unable to delete read-only option: '$opt'\n";
} else {
} elsif ($opt =~ m/^mp(\d+)$/) {
next if $hotplug_error->($opt);
check_protection($conf, "can't update CT $vmid drive '$opt'");
+ my $old = $conf->{$opt};
$conf->{$opt} = $value;
+ if (defined($old)) {
+ my $mp = parse_ct_mountpoint($old);
+ if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
+ add_unused_volume($conf, $mp->{volume});
+ }
+ }
$new_disks = 1;
} elsif ($opt eq 'rootfs') {
+ next if $hotplug_error->($opt);
check_protection($conf, "can't update CT $vmid drive '$opt'");
- die "implement me: $opt";
+ my $old = $conf->{$opt};
+ $conf->{$opt} = $value;
+ if (defined($old)) {
+ my $mp = parse_ct_rootfs($old);
+ if ($mp->{type} eq 'volume' && !is_volume_in_use($conf, $mp->{volume})) {
+ add_unused_volume($conf, $mp->{volume});
+ }
+ }
} elsif ($opt eq 'unprivileged') {
die "unable to modify read-only option: '$opt'\n";
} else {
write_config($vmid, $conf);
};
- lock_container($vmid, 10, $updatefn);
+ lock_config($vmid, $updatefn);
return $snap;
};
write_config($vmid, $conf);
};
- lock_container($vmid, 10 ,$updatefn);
+ lock_config($vmid ,$updatefn);
};
sub has_feature {
return $err ? 0 : 1;
}
+my $enter_namespace = sub {
+ my ($vmid, $pid, $which, $type) = @_;
+ sysopen my $fd, "/proc/$pid/ns/$which", O_RDONLY
+ or die "failed to open $which namespace of container $vmid: $!\n";
+ PVE::Tools::setns(fileno($fd), $type)
+ or die "failed to enter $which namespace of container $vmid: $!\n";
+ close $fd;
+};
+
+my $do_syncfs = sub {
+ my ($vmid, $pid, $socket) = @_;
+
+ &$enter_namespace($vmid, $pid, 'mnt', PVE::Tools::CLONE_NEWNS);
+
+ # Tell the parent process to start reading our /proc/mounts
+ print {$socket} "go\n";
+ $socket->flush();
+
+ # Receive /proc/self/mounts
+ my $mountdata = do { local $/ = undef; <$socket> };
+ close $socket;
+
+ # Now sync all mountpoints...
+ my $mounts = PVE::ProcFSTools::parse_mounts($mountdata);
+ foreach my $mp (@$mounts) {
+ my ($what, $dir, $fs) = @$mp;
+ next if $fs eq 'fuse.lxcfs';
+ eval { PVE::Tools::sync_mountpoint($dir); };
+ warn $@ if $@;
+ }
+};
+
+sub sync_container_namespace {
+ my ($vmid) = @_;
+ my $pid = find_lxc_pid($vmid);
+
+ # SOCK_DGRAM is nicer for barriers but cannot be slurped
+ socketpair my $pfd, my $cfd, AF_UNIX, SOCK_STREAM, PF_UNSPEC
+ or die "failed to create socketpair: $!\n";
+
+ my $child = fork();
+ die "fork failed: $!\n" if !defined($child);
+
+ if (!$child) {
+ eval {
+ close $pfd;
+ &$do_syncfs($vmid, $pid, $cfd);
+ };
+ if (my $err = $@) {
+ warn $err;
+ POSIX::_exit(1);
+ }
+ POSIX::_exit(0);
+ }
+ close $cfd;
+ my $go = <$pfd>;
+ die "failed to enter container namespace\n" if $go ne "go\n";
+
+ open my $mounts, '<', "/proc/$child/mounts"
+ or die "failed to open container's /proc/mounts: $!\n";
+ my $mountdata = do { local $/ = undef; <$mounts> };
+ close $mounts;
+ print {$pfd} $mountdata;
+ close $pfd;
+
+ while (waitpid($child, 0) != $child) {}
+ die "failed to sync container namespace\n" if $? != 0;
+}
+
sub snapshot_create {
my ($vmid, $snapname, $comment) = @_;
if ($running) {
$unfreeze = 1;
PVE::Tools::run_command(['/usr/bin/lxc-freeze', '-n', $vmid]);
- PVE::Tools::run_command(['/bin/sync']);
+ sync_container_namespace($vmid);
};
my $storecfg = PVE::Storage::config();
write_config($vmid, $conf);
};
- lock_container($vmid, 10, $updatefn);
+ lock_config($vmid, $updatefn);
my $storecfg = PVE::Storage::config();
my $err = $@;
if(!$err || ($err && $force)) {
- lock_container($vmid, 10, $del_snap);
+ lock_config($vmid, $del_snap);
if ($err) {
die "Can't delete snapshot: $vmid $snapname $err\n";
}
write_config($vmid, $conf);
};
- lock_container($vmid, 10, $updatefn);
+ lock_config($vmid, $updatefn);
PVE::Storage::volume_snapshot_rollback($storecfg, $volid, $snapname);
- lock_container($vmid, 5, $unlockfn);
+ lock_config($vmid, $unlockfn);
}
sub template_create {
return $found;
}
+# Run a function with a file attached to a loop device.
+# The loop device is always detached afterwards (or set to autoclear).
+# Returns the loop device.
+sub run_with_loopdev {
+ my ($func, $file) = @_;
+ my $device;
+ my $parser = sub {
+ my $line = shift;
+ if ($line =~ m@^(/dev/loop\d+)$@) {
+ $device = $1;
+ }
+ };
+ PVE::Tools::run_command(['losetup', '--show', '-f', $file], outfunc => $parser);
+ die "failed to setup loop device for $file\n" if !$device;
+ eval { &$func($device); };
+ my $err = $@;
+ PVE::Tools::run_command(['losetup', '-d', $device]);
+ die $err if $err;
+ return $device;
+}
+
+sub bindmount {
+ my ($dir, $dest, $ro, @extra_opts) = @_;
+ PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $dir, $dest]);
+ if ($ro) {
+ eval { PVE::Tools::run_command(['mount', '-o', 'bind,remount,ro', $dest]); };
+ if (my $err = $@) {
+ warn "bindmount error\n";
+ # don't leave writable bind-mounts behind...
+ PVE::Tools::run_command(['umount', $dest]);
+ die $err;
+ }
+ }
+}
+
# use $rootdir = undef to just return the corresponding mount path
sub mountpoint_mount {
my ($mountpoint, $rootdir, $storage_cfg, $snapname) = @_;
my $volid = $mountpoint->{volume};
my $mount = $mountpoint->{mp};
my $type = $mountpoint->{type};
+ my $quota = !$snapname && !$mountpoint->{ro} && $mountpoint->{quota};
+ my $mounted_dev;
return if !$volid || !$mount;
if (defined($mountpoint->{acl})) {
$optstring .= ($mountpoint->{acl} ? 'acl' : 'noacl');
}
- if ($mountpoint->{ro}) {
- $optstring .= ',' if $optstring;
- $optstring .= 'ro';
- }
+ my $readonly = $mountpoint->{ro};
my @extra_opts = ('-o', $optstring);
die "cannot mount subvol snapshots for storage type '$scfg->{type}'\n";
}
} else {
- if ($mountpoint->{ro}) {
- die "read-only bind mounts not supported\n";
- }
- PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $path, $mount_path]);
+ bindmount($path, $mount_path, $readonly, @extra_opts);
+ warn "cannot enable quota control for bind mounted subvolumes\n" if $quota;
}
}
- return wantarray ? ($path, 0) : $path;
+ return wantarray ? ($path, 0, $mounted_dev) : $path;
} elsif ($format eq 'raw' || $format eq 'iso') {
+ my $domount = sub {
+ my ($path) = @_;
+ if ($mount_path) {
+ if ($format eq 'iso') {
+ PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
+ } elsif ($isBase || defined($snapname)) {
+ PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
+ } else {
+ if ($quota) {
+ push @extra_opts, '-o', 'usrjquota=aquota.user,grpjquota=aquota.group,jqfmt=vfsv0';
+ }
+ push @extra_opts, '-o', 'ro' if $readonly;
+ PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
+ }
+ }
+ };
my $use_loopdev = 0;
if ($scfg->{path}) {
- push @extra_opts, '-o', 'loop';
+ $mounted_dev = run_with_loopdev($domount, $path);
$use_loopdev = 1;
} elsif ($scfg->{type} eq 'drbd' || $scfg->{type} eq 'lvm' ||
$scfg->{type} eq 'rbd' || $scfg->{type} eq 'lvmthin') {
- # do nothing
+ $mounted_dev = $path;
+ &$domount($path);
} else {
die "unsupported storage type '$scfg->{type}'\n";
}
- if ($mount_path) {
- if ($format eq 'iso') {
- PVE::Tools::run_command(['mount', '-o', 'ro', @extra_opts, $path, $mount_path]);
- } elsif ($isBase || defined($snapname)) {
- PVE::Tools::run_command(['mount', '-o', 'ro,noload', @extra_opts, $path, $mount_path]);
- } else {
- PVE::Tools::run_command(['mount', @extra_opts, $path, $mount_path]);
- }
- }
- return wantarray ? ($path, $use_loopdev) : $path;
+ return wantarray ? ($path, $use_loopdev, $mounted_dev) : $path;
} else {
die "unsupported image format '$format'\n";
}
} elsif ($type eq 'device') {
+ push @extra_opts, '-o', 'ro' if $readonly;
PVE::Tools::run_command(['mount', @extra_opts, $volid, $mount_path]) if $mount_path;
- return wantarray ? ($volid, 0) : $volid;
+ return wantarray ? ($volid, 0, $volid) : $volid;
} elsif ($type eq 'bind') {
- if ($mountpoint->{ro}) {
- die "read-only bind mounts not supported\n";
- # Theoretically we'd have to execute both:
- # mount -o bind $a $b
- # mount -o bind,remount,ro $a $b
- }
die "directory '$volid' does not exist\n" if ! -d $volid;
&$check_mount_path($volid);
- PVE::Tools::run_command(['mount', '-o', 'bind', @extra_opts, $volid, $mount_path]) if $mount_path;
- return wantarray ? ($volid, 0) : $volid;
+ bindmount($volid, $mount_path, $readonly, @extra_opts) if $mount_path;
+ warn "cannot enable quota control for bind mounts\n" if $quota;
+ return wantarray ? ($volid, 0, undef) : $volid;
}
die "unsupported storage";