Add device passthrough

[pve-container.git] / src / PVE / LXC.pm
diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm

index 37f184d046c9832e6f1674b33ceff987da3e4ae4..259fcb25b4c30514adfba71e4650164dee2fb526 100644 (file)
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -4,8 +4,8 @@ use strict;
  use warnings;
  
  use Cwd qw();
-use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED ENOSYS EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
+use Errno qw(ELOOP ENOENT ENOTDIR EROFS ECONNREFUSED EEXIST);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
  use File::Path;
  use File::Spec;
  use IO::Poll qw(POLLIN POLLHUP);
@@ -639,6 +639,29 @@ sub update_lxc_config {
         $raw .= "lxc.mount.auto = sys:mixed\n";
      }
  
+    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
+       my ($key, $device) = @_;
+
+       die "Path is not defined for passthrough device $key"
+           unless (defined($device->{path}));
+
+       my $absolute_path = $device->{path};
+       my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+       die "Device $absolute_path does not exist\n" if $! == ENOENT;
+
+       die "Error accessing device $absolute_path\n"
+           if (!defined($mode) || !defined($rdev));
+
+       die "$absolute_path is not a device\n"
+           if (!S_ISBLK($mode) && !S_ISCHR($mode));
+
+       my $major = PVE::Tools::dev_t_major($rdev);
+       my $minor = PVE::Tools::dev_t_minor($rdev);
+       my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+       $raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
+    });
+
      # WARNING: DO NOT REMOVE this without making sure that loop device nodes
      # cannot be exposed to the container with r/w access (cgroup perms).
      # When this is enabled mounts will still remain in the monitor's namespace
@@ -935,7 +958,7 @@ sub net_tap_plug : prototype($$) {
  
      if ($have_sdn) {
         PVE::Network::SDN::Zones::tap_plug($iface, $bridge, $tag, $firewall, $trunks, $rate);
-       PVE::Network::SDN::Zones::add_bridge_fdb($iface, $hwaddr, $bridge, $firewall);
+       PVE::Network::SDN::Zones::add_bridge_fdb($iface, $hwaddr, $bridge);
      } else {
         PVE::Network::tap_plug($iface, $bridge, $tag, $firewall, $trunks, $rate, { mac => $hwaddr });
      }
@@ -1036,6 +1059,32 @@ sub hotplug_net {
      PVE::LXC::Config->write_config($vmid, $conf);
  }
  
+sub get_interfaces {
+    my ($vmid) = @_;
+
+    my $pid = eval { find_lxc_pid($vmid); };
+    return if $@;
+
+    my $output;
+    # enters the network namespace of the container and executes 'ip a'
+    run_command(['nsenter', '-t', $pid, '--net', '--', 'ip', '--json', 'a'],
+       outfunc => sub { $output .= shift; });
+
+    my $config = JSON::decode_json($output);
+
+    my $res;
+    for my $interface ($config->@*) {
+       my $obj = { name => $interface->{ifname} };
+       for my $ip ($interface->{addr_info}->@*) {
+           $obj->{$ip->{family}} = $ip->{local} . "/" . $ip->{prefixlen};
+       }
+       $obj->{hwaddr} = $interface->{address};
+       push @$res, $obj
+    }
+
+    return $res;
+}
+
  sub update_ipconfig {
      my ($vmid, $conf, $opt, $eth, $newnet, $rootdir) = @_;
  
@@ -1314,10 +1363,14 @@ sub check_ct_modify_config_perm {
             }
         } elsif ($opt eq 'memory' || $opt eq 'swap') {
             $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Memory']);
-       } elsif ($opt =~ m/^net\d+$/ || $opt eq 'nameserver' ||
-                $opt eq 'searchdomain' || $opt eq 'hostname') {
+       } elsif ($opt =~ m/^net\d+$/) {
+           $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
+           check_bridge_access($rpcenv, $authuser, $oldconf->{$opt}) if $oldconf->{$opt};
+           check_bridge_access($rpcenv, $authuser, $newconf->{$opt}) if $newconf->{$opt};
+       } elsif ($opt =~ m/^dev\d+$/) {
+           raise_perm_exc("configuring device passthrough is only allowed for root\@pam");
+       } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' || $opt eq 'hostname') {
             $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
-           PVE::LXC::check_bridge_access($rpcenv, $authuser, $newconf->{$opt});
         } elsif ($opt eq 'features') {
             raise_perm_exc("changing feature flags for privileged container is only allowed for root\@pam")
                 if !$unprivileged;
@@ -1679,7 +1732,6 @@ sub mountpoint_stage {
         __mountpoint_mount($mountpoint, $stage_dir, $storage_cfg, $snapname, $rootuid, $rootgid, 1);
  
      if (!defined($path)) {
-       return undef if $! == ENOSYS;
         die "failed to mount subvolume: $!\n";
      }
  
@@ -1714,16 +1766,9 @@ sub mountpoint_insert_staged {
  
  # Use $stage_mount, $rootdir is treated as a temporary path to "stage" the file system. The user
  #   can then open a file descriptor to it which can be used with the `move_mount` syscall.
-#   Note that if the kernel does not support the new mount API, this will not perform any action
-#   and return `undef` with $! = ENOSYS.
  sub __mountpoint_mount {
      my ($mountpoint, $rootdir, $storage_cfg, $snapname, $rootuid, $rootgid, $stage_mount) = @_;
  
-    if (defined($stage_mount) && !PVE::LXC::Tools::can_use_new_mount_api()) {
-       $! = ENOSYS;
-       return undef;
-    }
-
      # When staging mount points we always mount to $rootdir directly (iow. as if `mp=/`).
      # This is required since __mount_prepare_rootdir() will return handles to the parent directory
      # which we use in __bindmount_verify()!
@@ -1931,7 +1976,7 @@ sub get_staging_mount_path($) {
      return $target;
  }
  
-# Mount /run/pve/mountpoints as tmpfs
+# Mount tmpfs for mount point staging and return the path.
  sub get_staging_tempfs() {
      # We choose a path in /var/lib/lxc/ here because the lxc-start apparmor profile restricts most
      # mounts to that.
@@ -2373,6 +2418,34 @@ sub validate_id_maps {
      }
  }
  
+sub map_ct_id_to_host {
+    my ($id, $id_map, $id_type) = @_;
+
+    for my $mapping (@$id_map) {
+       my ($type, $ct, $host, $length) = @$mapping;
+
+       next if ($type ne $id_type);
+
+       if ($id >= $ct && $id < ($ct + $length)) {
+           return $host - $ct + $id;
+       }
+    }
+
+    return $id;
+}
+
+sub map_ct_uid_to_host {
+    my ($uid, $id_map) = @_;
+
+    return map_ct_id_to_host($uid, $id_map, 'u');
+}
+
+sub map_ct_gid_to_host {
+    my ($gid, $id_map) = @_;
+
+    return map_ct_id_to_host($gid, $id_map, 'g');
+}
+
  sub userns_command {
      my ($id_map) = @_;
      if (@$id_map) {