safe_read_from: bump default size limit to 1 MiB to match pmxcfs

[pve-common.git] / src / PVE / Tools.pm
diff --git a/src/PVE/Tools.pm b/src/PVE/Tools.pm

index dd9cd0f8e5f0e4d0aed19ba3bb52a2cce4c2c723..a1571ea3d9fa8c05746fea45ad7c525b197d5c6d 100644 (file)
--- a/src/PVE/Tools.pm
+++ b/src/PVE/Tools.pm
@@ -2,30 +2,32 @@ package PVE::Tools;
  
  use strict;
  use warnings;
-use POSIX qw(EINTR EEXIST EOPNOTSUPP);
-use IO::Socket::IP;
-use Socket qw(AF_INET AF_INET6 AI_ALL AI_V4MAPPED AI_CANONNAME SOCK_DGRAM);
-use IO::Select;
+
+use Date::Format qw(time2str);
+use Digest::MD5;
+use Digest::SHA;
+use Encode;
+use Fcntl qw(:DEFAULT :flock);
  use File::Basename;
  use File::Path qw(make_path);
  use Filesys::Df (); # don't overwrite our df()
-use IO::Pipe;
-use IO::File;
  use IO::Dir;
+use IO::File;
  use IO::Handle;
+use IO::Pipe;
+use IO::Select;
+use IO::Socket::IP;
  use IPC::Open3;
-use Fcntl qw(:DEFAULT :flock);
-use base 'Exporter';
-use URI::Escape;
-use Encode;
-use Digest::SHA;
-use Text::ParseWords;
+use JSON;
+use POSIX qw(EINTR EEXIST EOPNOTSUPP);
+use Scalar::Util 'weaken';
+use Socket qw(AF_INET AF_INET6 AI_ALL AI_V4MAPPED AI_CANONNAME SOCK_DGRAM IPPROTO_TCP);
  use String::ShellQuote;
+use Text::ParseWords;
  use Time::HiRes qw(usleep gettimeofday tv_interval alarm);
-use Net::DBus qw(dbus_uint32 dbus_uint64);
-use Net::DBus::Callback;
-use Net::DBus::Reactor;
-use Scalar::Util 'weaken';
+use URI::Escape;
+use base 'Exporter';
+
  use PVE::Syscall;
  
  # avoid warning when parsing long hex values with hex()
@@ -47,9 +49,19 @@ template_replace
  safe_print
  trim
  extract_param
+extract_sensitive_params
  file_copy
+get_host_arch
  O_PATH
  O_TMPFILE
+AT_EMPTY_PATH
+AT_FDCWD
+CLONE_NEWNS
+CLONE_NEWUTS
+CLONE_NEWIPC
+CLONE_NEWUSER
+CLONE_NEWPID
+CLONE_NEWNET
  );
  
  my $pvelogdir = "/var/log/pve";
@@ -76,6 +88,9 @@ our $IPV6RE = "(?:" .
  
  our $IPRE = "(?:$IPV4RE|$IPV6RE)";
  
+our $EMAIL_USER_RE = qr/[\w\+\-\~]+(\.[\w\+\-\~]+)*/;
+our $EMAIL_RE = qr/$EMAIL_USER_RE@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)*/;
+
  use constant {CLONE_NEWNS   => 0x00020000,
                CLONE_NEWUTS  => 0x04000000,
                CLONE_NEWIPC  => 0x08000000,
@@ -84,8 +99,17 @@ use constant {CLONE_NEWNS   => 0x00020000,
                CLONE_NEWNET  => 0x40000000};
  
  use constant {O_PATH    => 0x00200000,
+              O_CLOEXEC => 0x00080000,
                O_TMPFILE => 0x00410000}; # This includes O_DIRECTORY
  
+use constant {AT_EMPTY_PATH => 0x1000,
+              AT_FDCWD => -100};
+
+# from <linux/fs.h>
+use constant {RENAME_NOREPLACE => (1 << 0),
+              RENAME_EXCHANGE  => (1 << 1),
+              RENAME_WHITEOUT  => (1 << 2)};
+
  sub run_with_timeout {
      my ($timeout, $code, @param) = @_;
  
@@ -148,7 +172,7 @@ sub lock_file_full {
             or die "can't open file - $!\n";
  
         if (!flock($fh, $mode|LOCK_NB)) {
-           print STDERR "trying to acquire lock...";
+           print STDERR "trying to acquire lock...\n";
             my $success;
             while(1) {
                 $success = flock($fh, $mode);
@@ -277,7 +301,8 @@ sub file_read_firstline {
  sub safe_read_from {
      my ($fh, $max, $oneline, $filename) = @_;
  
-    $max = 32768 if !$max;
+    # pmxcfs file size limit
+    $max = 1024 * 1024 if !$max;
  
      my $subject = defined($filename) ? "file '$filename'" : 'input';
  
@@ -367,6 +392,7 @@ sub run_command {
      my $afterfork;
      my $noerr;
      my $keeplocale;
+    my $quiet;
  
      eval {
  
@@ -393,6 +419,8 @@ sub run_command {
                 $noerr = $param{$p};
             } elsif ($p eq 'keeplocale') {
                 $keeplocale = $param{$p};
+           } elsif ($p eq 'quiet') {
+               $quiet = $param{$p};
             } else {
                 die "got unknown parameter '$p' for run_command\n";
             }
@@ -426,13 +454,12 @@ sub run_command {
  
             $pid = open3($writer, $reader, $error, @$cmd) || die $!;
  
-           # if we pipe fron STDIN, open3 closes STDIN, so we we
-           # a perl warning "Filehandle STDIN reopened as GENXYZ .. "
-           # as soon as we open a new file.
+           # if we pipe fron STDIN, open3 closes STDIN, so we get a perl warning like
+           # "Filehandle STDIN reopened as GENXYZ .. " as soon as we open a new file.
             # to avoid that we open /dev/null
             if (!ref($writer) && !defined(fileno(STDIN))) {
                 POSIX::close(0);
-               open(STDIN, "</dev/null");
+               open(STDIN, '<', '/dev/null');
             }
         };
  
@@ -457,7 +484,7 @@ sub run_command {
             close $writer;
         }
  
-       my $select = new IO::Select;
+       my $select = IO::Select->new();
         $select->add($reader) if ref($reader);
         $select->add($error);
  
@@ -482,12 +509,13 @@ sub run_command {
                 if ($h eq $reader) {
                     if ($outfunc || $logfunc) {
                         eval {
-                           $outlog .= $buf;
-                           while ($outlog =~ s/^([^\010\r\n]*)(\r|\n|(\010)+|\r\n)//s) {
-                               my $line = $1;
+                           while ($buf =~ s/^([^\010\r\n]*)(?:\n|(?:\010)+|\r\n?)//) {
+                               my $line = $outlog . $1;
+                               $outlog = '';
                                 &$outfunc($line) if $outfunc;
                                 &$logfunc($line) if $logfunc;
                             }
+                           $outlog .= $buf;
                         };
                         my $err = $@;
                         if ($err) {
@@ -495,19 +523,20 @@ sub run_command {
                             waitpid ($pid, 0);
                             die $err;
                         }
-                   } else {
+                   } elsif (!$quiet) {
                         print $buf;
                         *STDOUT->flush();
                     }
                 } elsif ($h eq $error) {
                     if ($errfunc || $logfunc) {
                         eval {
-                           $errlog .= $buf;
-                           while ($errlog =~ s/^([^\010\r\n]*)(\r|\n|(\010)+|\r\n)//s) {
-                               my $line = $1;
+                           while ($buf =~ s/^([^\010\r\n]*)(?:\n|(?:\010)+|\r\n?)//) {
+                               my $line = $errlog . $1;
+                               $errlog = '';
                                 &$errfunc($line) if $errfunc;
                                 &$logfunc($line) if $logfunc;
                             }
+                           $errlog .= $buf;
                         };
                         my $err = $@;
                         if ($err) {
@@ -515,7 +544,7 @@ sub run_command {
                             waitpid ($pid, 0);
                             die $err;
                         }
-                   } else {
+                   } elsif (!$quiet) {
                         print STDERR $buf;
                         *STDERR->flush();
                     }
@@ -579,8 +608,66 @@ sub run_command {
      return $exitcode;
  }
  
+# Run a command with a tcp socket as standard input.
+sub pipe_socket_to_command  {
+    my ($cmd, $ip, $port) = @_;
+
+    my $params = {
+       Listen => 1,
+       ReuseAddr => 1,
+       Proto => &Socket::IPPROTO_TCP,
+       GetAddrInfoFlags => 0,
+       LocalAddr => $ip,
+       LocalPort => $port,
+    };
+    my $socket = IO::Socket::IP->new(%$params) or die "failed to open socket: $!\n";
+
+    print "$ip\n$port\n"; # tell remote where to connect
+    *STDOUT->flush();
+
+    alarm 0;
+    local $SIG{ALRM} = sub { die "timed out waiting for client\n" };
+    alarm 30;
+    my $client = $socket->accept; # Wait for a client
+    alarm 0;
+    close($socket);
+
+    # We want that the command talks over the TCP socket and takes
+    # ownership of it, so that when it closes it the connection is
+    # terminated, so we need to be able to close the socket. So we
+    # can't really use PVE::Tools::run_command().
+    my $pid = fork() // die "fork failed: $!\n";
+    if (!$pid) {
+       POSIX::dup2(fileno($client), 0);
+       POSIX::dup2(fileno($client), 1);
+       close($client);
+       exec {$cmd->[0]} @$cmd or do {
+           warn "exec failed: $!\n";
+           POSIX::_exit(1);
+       };
+    }
+
+    close($client);
+    if (waitpid($pid, 0) != $pid) {
+       kill(15 => $pid); # if we got interrupted terminate the child
+       my $count = 0;
+       while (waitpid($pid, POSIX::WNOHANG) != $pid) {
+           usleep(100000);
+           $count++;
+           kill(9 => $pid), last if $count > 300; # 30 second timeout
+       }
+    }
+    if (my $sig = ($? & 127)) {
+       die "got signal $sig\n";
+    } elsif (my $exitcode = ($? >> 8)) {
+       die "exit code $exitcode\n";
+    }
+
+    return undef;
+}
+
  sub split_list {
-    my $listtxt = shift || '';
+    my $listtxt = shift // '';
  
      return split (/\0/, $listtxt) if $listtxt =~ m/\0/;
  
@@ -610,7 +697,7 @@ sub template_replace {
      return $tmpl if !$tmpl;
  
      my $res = '';
-    while ($tmpl =~ m/([^{]+)?({([^}]+)})?/g) {
+    while ($tmpl =~ m/([^{]+)?(\{([^}]+)\})?/g) {
         $res .= $1 if $1;
         $res .= ($data->{$3} || '-') if $2;
      }
@@ -700,7 +787,7 @@ my $keymaphash =  {
      'pl'     => ['Polish', 'pl', 'qwerty/pl.kmap.gz', 'pl', undef],
      'pt'     => ['Portuguese', 'pt', 'qwerty/pt-latin1.kmap.gz', 'pt', 'nodeadkeys'],
      'pt-br'  => ['Brazil-Portuguese', 'pt-br', 'qwerty/br-latin1.kmap.gz', 'br', 'nodeadkeys'],
-    #'ru'     => ['Russian', 'ru', 'qwerty/ru.kmap.gz', 'ru', undef], # dont know?
+    #'ru'     => ['Russian', 'ru', 'qwerty/ru.kmap.gz', 'ru', undef], # don't know?
      'si'     => ['Slovenian', 'sl', 'qwertz/slovene.kmap.gz', 'si', undef],
      'se'     => ['Swedish', 'sv', 'qwerty/se-latin1.kmap.gz', 'se', 'nodeadkeys'],
      #'th'     => [],
@@ -729,36 +816,59 @@ sub extract_param {
      return $res;
  }
  
+# For extracting sensitive keys (e.g. password), to avoid writing them to www-data owned configs
+sub extract_sensitive_params :prototype($$$) {
+    my ($param, $sensitive_list, $delete_list) = @_;
+
+    my %delete = map { $_ => 1 } ($delete_list || [])->@*;
+
+    my $sensitive = {};
+    for my $opt (@$sensitive_list) {
+       # handle deletions as explicitly setting `undef`, so subs which only have $param but not
+       # $delete_list available can recognize them. Afterwards new values  may override.
+       if (exists($delete{$opt})) {
+           $sensitive->{$opt} = undef;
+       }
+
+       if (defined(my $value = extract_param($param, $opt))) {
+           $sensitive->{$opt} = $value;
+       }
+    }
+
+    return $sensitive;
+}
+
  # Note: we use this to wait until vncterm/spiceterm is ready
  sub wait_for_vnc_port {
-    my ($port, $timeout) = @_;
+    my ($port, $family, $timeout) = @_;
  
      $timeout = 5 if !$timeout;
      my $sleeptime = 0;
      my $starttime = [gettimeofday];
      my $elapsed;
  
+    my $cmd = ['/bin/ss', '-Htln', "sport = :$port"];
+    push @$cmd, $family == AF_INET6 ? '-6' : '-4' if defined($family);
+
+    my $found;
      while (($elapsed = tv_interval($starttime)) < $timeout) {
-       if (my $fh = IO::File->new ("/proc/net/tcp", "r")) {
-           while (defined (my $line = <$fh>)) {
-               if ($line =~ m/^\s*\d+:\s+([0-9A-Fa-f]{8}):([0-9A-Fa-f]{4})\s/) {
-                   if ($port == hex($2)) {
-                       close($fh);
-                       return 1;
-                   }
-               }
+       # -Htln = don't print header, tcp, listening sockets only, numeric ports
+       run_command($cmd, outfunc => sub {
+           my $line = shift;
+           if ($line =~ m/^LISTEN\s+\d+\s+\d+\s+\S+:(\d+)\s/) {
+               $found = 1 if ($port == $1);
             }
-           close($fh);
-       }
+       });
+       return 1 if $found;
         $sleeptime += 100000 if  $sleeptime < 1000000;
         usleep($sleeptime);
      }
  
-    return undef;
+    die "Timeout while waiting for port '$port' to get ready!\n";
  }
  
  sub next_unused_port {
-    my ($range_start, $range_end, $family) = @_;
+    my ($range_start, $range_end, $family, $address) = @_;
  
      # We use a file to register allocated ports.
      # Those registrations expires after $expiretime.
@@ -786,16 +896,18 @@ sub next_unused_port {
         }
  
         my $newport;
+       my %sockargs = (Listen => 5,
+                       ReuseAddr => 1,
+                       Family    => $family,
+                       Proto     => IPPROTO_TCP,
+                       GetAddrInfoFlags => 0);
+       $sockargs{LocalAddr} = $address if defined($address);
  
         for (my $p = $range_start; $p < $range_end; $p++) {
             next if $ports->{$p}; # reserved
  
-           my $sock = IO::Socket::IP->new(Listen => 5,
-                                          LocalPort => $p,
-                                          ReuseAddr => 1,
-                                          Family    => $family,
-                                          Proto     => 0,
-                                          GetAddrInfoFlags => 0);
+           $sockargs{LocalPort} = $p;
+           my $sock = IO::Socket::IP->new(%sockargs);
  
             if ($sock) {
                 close($sock);
@@ -824,71 +936,149 @@ sub next_unused_port {
  }
  
  sub next_migrate_port {
-    my ($family) = @_;
-    return next_unused_port(60000, 60050, $family);
+    my ($family, $address) = @_;
+    return next_unused_port(60000, 60050, $family, $address);
  }
  
  sub next_vnc_port {
-    my ($family) = @_;
-    return next_unused_port(5900, 6000, $family);
+    my ($family, $address) = @_;
+    return next_unused_port(5900, 6000, $family, $address);
+}
+
+sub spice_port_range {
+    return (61000, 61999);
  }
  
  sub next_spice_port {
-    my ($family) = @_;
-    return next_unused_port(61000, 61099, $family);
+    my ($family, $address) = @_;
+    return next_unused_port(spice_port_range(), $family, $address);
  }
  
-# NOTE: NFS syscall can't be interrupted, so alarm does
-# not work to provide timeouts.
-# from 'man nfs': "Only SIGKILL can interrupt a pending NFS operation"
-# So fork() before using Filesys::Df
-sub df {
-    my ($path, $timeout) = @_;
+sub must_stringify {
+       my ($value) = @_;
+       eval { $value = "$value" };
+       return "error turning value into a string: $@" if $@;
+       return $value;
+}
  
-    my $res = {
-       total => 0,
-       used => 0,
-       avail => 0,
-    };
+# sigkill after $timeout  a $sub running in a fork if it can't write a pipe
+# the $sub has to return a single scalar
+sub run_fork_with_timeout {
+    my ($timeout, $sub) = @_;
+
+    my $res;
+    my $error;
+    my $pipe_out = IO::Pipe->new();
+
+    # disable pending alarms, save their remaining time
+    my $prev_alarm = alarm 0;
+
+    # avoid leaving a zombie if the parent gets interrupted
+    my $sig_received;
  
-    my $pipe = IO::Pipe->new();
      my $child = fork();
      if (!defined($child)) {
-       warn "fork failed: $!\n";
+       die "fork failed: $!\n";
         return $res;
      }
  
      if (!$child) {
-       $pipe->writer();
+       $pipe_out->writer();
+
         eval {
-           my $df = Filesys::Df::df($path, 1);
-           print {$pipe} "$df->{blocks}\n$df->{used}\n$df->{bavail}\n";
-           $pipe->close();
+           $res = $sub->();
+           print {$pipe_out} encode_json({ result => $res });
+           $pipe_out->flush();
         };
         if (my $err = $@) {
-           warn $err;
+           print {$pipe_out} encode_json({ error => must_stringify($err) });
+           $pipe_out->flush();
             POSIX::_exit(1);
         }
         POSIX::_exit(0);
      }
  
-    $pipe->reader();
+    local $SIG{INT} = sub { $sig_received++; };
+    local $SIG{TERM} = sub {
+       $error //= "interrupted by unexpected signal\n";
+       kill('TERM', $child);
+    };
+
+    $pipe_out->reader();
  
      my $readvalues = sub {
-       $res->{total} = int((<$pipe> =~ /^(\d*)$/)[0]);
-       $res->{used}  = int((<$pipe> =~ /^(\d*)$/)[0]);
-       $res->{avail} = int((<$pipe> =~ /^(\d*)$/)[0]);
+       local $/ = undef;
+       my $child_res = decode_json(readline_nointr($pipe_out));
+       $res = $child_res->{result};
+       $error = $child_res->{error};
      };
      eval {
-       run_with_timeout($timeout, $readvalues);
+       if (defined($timeout)) {
+           run_with_timeout($timeout, $readvalues);
+       } else {
+           $readvalues->();
+       }
      };
      warn $@ if $@;
-    $pipe->close();
+    $pipe_out->close();
      kill('KILL', $child);
      waitpid($child, 0);
+
+    alarm $prev_alarm;
+    die "interrupted by unexpected signal\n" if $sig_received;
+
+    die $error if $error;
      return $res;
  }
  
+sub run_fork {
+    my ($code) = @_;
+    return run_fork_with_timeout(undef, $code);
+}
+
+# NOTE: NFS syscall can't be interrupted, so alarm does
+# not work to provide timeouts.
+# from 'man nfs': "Only SIGKILL can interrupt a pending NFS operation"
+# So fork() before using Filesys::Df
+sub df {
+    my ($path, $timeout) = @_;
+
+    my $df = sub { return Filesys::Df::df($path, 1) };
+
+    my $res = eval { run_fork_with_timeout($timeout, $df) } // {};
+    warn $@ if $@;
+
+    # untaint, but be flexible: PB usage can result in scientific notation
+    my ($blocks, $used, $bavail) = map { defined($_) ? (/^([\d\.e\-+]+)$/) : 0 }
+       $res->@{qw(blocks used bavail)};
+
+    return {
+       total => $blocks,
+       used => $used,
+       avail => $bavail,
+    };
+}
+
+sub du {
+    my ($path, $timeout) = @_;
+
+    my $size;
+
+    $timeout //= 10;
+
+    my $parser = sub {
+       my $line = shift;
+
+       if ($line =~ m/^(\d+)\s+total$/) {
+           $size = $1;
+       }
+    };
+
+    run_command(['du', '-scb', $path], outfunc => $parser, timeout => $timeout);
+
+    return $size;
+}
+
  # UPID helper
  # We use this to uniquely identify a process.
  # An 'Unique Process ID' has the following format:
@@ -971,6 +1161,8 @@ sub upid_read_status {
             return 'OK';
         } elsif ($line =~ m/^TASK ERROR: (.+)$/) {
             return $1;
+       } elsif ($line =~ m/^TASK (WARNINGS: \d+)$/) {
+           return $1;
         } else {
             return "unexpected status";
         }
@@ -978,6 +1170,31 @@ sub upid_read_status {
      return "unable to read tail (got $br bytes)";
  }
  
+# Check if the status returned by upid_read_status is an error status.
+# If the status could not be parsed it's also treated as an error.
+sub upid_status_is_error {
+    my ($status) = @_;
+
+    return !($status eq 'OK' || $status =~ m/^WARNINGS: \d+$/);
+}
+
+# takes the parsed status and returns the type, either ok, warning, error or unknown
+sub upid_normalize_status_type {
+    my ($status) = @_;
+
+    if (!$status) {
+       return 'unknown';
+    } elsif ($status eq 'OK') {
+       return 'ok';
+    } elsif ($status =~ m/^WARNINGS: \d+$/) {
+       return 'warning';
+    } elsif ($status eq 'unexpected status') {
+       return 'unknown';
+    } else {
+       return 'error';
+    }
+}
+
  # useful functions to store comments in config files
  sub encode_text {
      my ($text) = @_;
@@ -1203,8 +1420,10 @@ sub unpack_sockaddr_in46 {
  
  sub getaddrinfo_all {
      my ($hostname, @opts) = @_;
-    my %hints = ( flags => AI_V4MAPPED | AI_ALL,
-                  @opts );
+    my %hints = (
+       flags => AI_V4MAPPED | AI_ALL,
+       @opts,
+    );
      my ($err, @res) = Socket::getaddrinfo($hostname, '0', \%hints);
      die "failed to get address info for: $hostname: $err\n" if $err;
      return @res;
@@ -1247,6 +1466,11 @@ sub parse_host_and_port {
      return; # nothing
  }
  
+sub setresuid($$$) {
+    my ($ruid, $euid, $suid) = @_;
+    return 0 == syscall(PVE::Syscall::setresuid, $ruid, $euid, $suid);
+}
+
  sub unshare($) {
      my ($flags) = @_;
      return 0 == syscall(PVE::Syscall::unshare, $flags);
@@ -1262,88 +1486,124 @@ sub syncfs($) {
      return 0 == syscall(PVE::Syscall::syncfs, $fileno);
  }
  
+sub fsync($) {
+    my ($fileno) = @_;
+    return 0 == syscall(PVE::Syscall::fsync, $fileno);
+}
+
+sub renameat2($$$$$) {
+    my ($olddirfd, $oldpath, $newdirfd, $newpath, $flags) = @_;
+    return 0 == syscall(PVE::Syscall::renameat2, $olddirfd, $oldpath, $newdirfd, $newpath, $flags);
+}
+
  sub sync_mountpoint {
      my ($path) = @_;
-    sysopen my $fd, $path, O_PATH or die "failed to open $path: $!\n";
-    my $result = syncfs(fileno($fd));
+    sysopen my $fd, $path, O_RDONLY|O_CLOEXEC or die "failed to open $path: $!\n";
+    my $syncfs_err;
+    if (!syncfs(fileno($fd))) {
+       $syncfs_err = "$!";
+    }
      close($fd);
-    return $result;
+    die "syncfs '$path' failed - $syncfs_err\n" if defined $syncfs_err;
+}
+
+my sub check_mail_addr {
+    my ($addr) = @_;
+    die "'$addr' does not look like a valid email address or username\n"
+       if $addr !~ /^$EMAIL_RE$/ && $addr !~ /^$EMAIL_USER_RE$/;
  }
  
  # support sending multi-part mail messages with a text and or a HTML part
  # mailto may be a single email string or an array of receivers
  sub sendmail {
      my ($mailto, $subject, $text, $html, $mailfrom, $author) = @_;
-    my $mail_re = qr/[^-a-zA-Z0-9+._@]/;
  
      $mailto = [ $mailto ] if !ref($mailto);
  
-    foreach (@$mailto) {
-       die "illegal character in mailto address\n"
-           if ($_ =~ $mail_re);
-    }
-
-    my $rcvrtxt = join (', ', @$mailto);
+    check_mail_addr($_) for $mailto->@*;
+    my $to_quoted = [ map { shellquote($_) } $mailto->@* ];
  
      $mailfrom = $mailfrom || "root";
-    die "illegal character in mailfrom address\n"
-       if $mailfrom =~ $mail_re;
+    check_mail_addr($mailfrom);
+    my $from_quoted = shellquote($mailfrom);
+
+    $author = $author // 'Proxmox VE';
+
+    open (my $mail, "|-", "sendmail", "-B", "8BITMIME", "-f", $from_quoted, "--", $to_quoted->@*)
+       or die "unable to open 'sendmail' - $!";
  
-    $author = $author || 'Proxmox VE';
+    my $is_multipart = $text && $html;
+    my $boundary = "----_=_NextPart_001_" . int(time()) . $$; # multipart spec, see rfc 1521
  
-    open (MAIL, "|-", "sendmail", "-B", "8BITMIME", "-f", $mailfrom, @$mailto) ||
-       die "unable to open 'sendmail' - $!";
+    $subject = Encode::encode('MIME-Header', $subject) if $subject =~ /[^[:ascii:]]/;
  
-    # multipart spec see https://www.ietf.org/rfc/rfc1521.txt
-    my $boundary = "----_=_NextPart_001_".int(time).$$;
+    print $mail "MIME-Version: 1.0\n" if $subject =~ /[^[:ascii:]]/ || $is_multipart;
  
-    print MAIL "Content-Type: multipart/alternative;\n";
-    print MAIL "\tboundary=\"$boundary\"\n";
-    print MAIL "MIME-Version: 1.0\n";
+    print $mail "From: $author <$mailfrom>\n";
+    print $mail "To: " . join(', ', @$mailto) ."\n";
+    print $mail "Date: " . time2str('%a, %d %b %Y %H:%M:%S %z', time()) . "\n";
+    print $mail "Subject: $subject\n";
  
-    print MAIL "FROM: $author <$mailfrom>\n";
-    print MAIL "TO: $rcvrtxt\n";
-    print MAIL "SUBJECT: $subject\n";
-    print MAIL "\n";
-    print MAIL "This is a multi-part message in MIME format.\n\n";
-    print MAIL "--$boundary\n";
+    if ($is_multipart) {
+       print $mail "Content-Type: multipart/alternative;\n";
+       print $mail "\tboundary=\"$boundary\"\n";
+       print $mail "\n";
+       print $mail "This is a multi-part message in MIME format.\n\n";
+       print $mail "--$boundary\n";
+    }
  
      if (defined($text)) {
-       print MAIL "Content-Type: text/plain;\n";
-       print MAIL "\tcharset=\"UTF8\"\n";
-       print MAIL "Content-Transfer-Encoding: 8bit\n";
-       print MAIL "\n";
+       print $mail "Content-Type: text/plain;\n";
+       print $mail "\tcharset=\"UTF-8\"\n";
+       print $mail "Content-Transfer-Encoding: 8bit\n";
+       print $mail "\n";
  
         # avoid 'remove extra line breaks' issue (MS Outlook)
         my $fill = '  ';
         $text =~ s/^/$fill/gm;
  
-       print MAIL $text;
+       print $mail $text;
  
-       print MAIL "\n--$boundary\n";
+       print $mail "\n--$boundary\n" if $is_multipart;
      }
  
      if (defined($html)) {
-       print MAIL "Content-Type: text/html;\n";
-       print MAIL "\tcharset=\"UTF8\"\n";
-       print MAIL "Content-Transfer-Encoding: 8bit\n";
-       print MAIL "\n";
+       print $mail "Content-Type: text/html;\n";
+       print $mail "\tcharset=\"UTF-8\"\n";
+       print $mail "Content-Transfer-Encoding: 8bit\n";
+       print $mail "\n";
  
-       print MAIL $html;
+       print $mail $html;
  
-       print MAIL "\n--$boundary--\n";
+       print $mail "\n--$boundary--\n" if $is_multipart;
      }
  
-    close(MAIL);
+    close($mail);
  }
  
+# creates a temporary file that does not shows up on the file system hierarchy.
+#
+# Uses O_TMPFILE if available, which makes it just an anon inode that never shows up in the FS.
+# If O_TMPFILE is not available, which unlikely nowadays (added in 3.11 kernel and all FS relevant
+# for us support it) back to open-create + immediate unlink while still holding the file  handle.
+#
+# TODO: to avoid FS dependent features we could (transparently) switch to memfd_create as backend
  sub tempfile {
      my ($perm, %opts) = @_;
  
      # default permissions are stricter than with file_set_contents
      $perm = 0600 if !defined($perm);
  
-    my $dir = $opts{dir} // '/run';
+    my $dir = $opts{dir};
+    if (!$dir) {
+       if (-d "/run/user/$<") {
+           $dir = "/run/user/$<";
+       } elsif ($< == 0) {
+           $dir = "/run";
+       } else {
+           $dir = "/tmp";
+       }
+    }
      my $mode = $opts{mode} // O_RDWR;
      $mode |= O_EXCL if !$opts{allow_links};
  
@@ -1358,6 +1618,7 @@ sub tempfile {
      return $fh;
  }
  
+# create an (ideally) anon file with the $data as content and return its FD-path and FH
  sub tempfile_contents {
      my ($data, $perm, %opts) = @_;
  
@@ -1409,75 +1670,9 @@ sub mkdirat($$$) {
      return syscall(PVE::Syscall::mkdirat, $dirfd, $name, $mode) == 0;
  }
  
-# NOTE: This calls the dbus main loop and must not be used when another dbus
-# main loop is being used as we need to wait for the JobRemoved signal.
-# Polling the job status instead doesn't work because this doesn't give us the
-# distinction between success and failure.
-#
-# Note that the description is mandatory for security reasons.
-sub enter_systemd_scope {
-    my ($unit, $description, %extra) = @_;
-    die "missing description\n" if !defined($description);
-
-    my $timeout = delete $extra{timeout};
-
-    $unit .= '.scope';
-    my $properties = [ [PIDs => [dbus_uint32($$)]] ];
-
-    foreach my $key (keys %extra) {
-       if ($key eq 'Slice' || $key eq 'KillMode') {
-           push @$properties, [$key, $extra{$key}];
-       } elsif ($key eq 'CPUShares') {
-           push @$properties, [$key, dbus_uint64($extra{$key})];
-       } elsif ($key eq 'CPUQuota') {
-           push @$properties, ['CPUQuotaPerSecUSec',
-                               dbus_uint64($extra{$key} * 10000)];
-       } else {
-           die "Don't know how to encode $key for systemd scope\n";
-       }
-    }
-
-    my $job;
-    my $done = 0;
-
-    my $bus = Net::DBus->system();
-    my $reactor = Net::DBus::Reactor->main();
-
-    my $service = $bus->get_service('org.freedesktop.systemd1');
-    my $if = $service->get_object('/org/freedesktop/systemd1', 'org.freedesktop.systemd1.Manager');
-    # Connect to the JobRemoved signal since we want to wait for it to finish
-    my $sigid;
-    my $timer;
-    my $cleanup = sub {
-       my ($no_shutdown) = @_;
-       $if->disconnect_from_signal('JobRemoved', $sigid) if defined($if);
-       $if = undef;
-       $sigid = undef;
-       $reactor->remove_timeout($timer) if defined($timer);
-       $timer = undef;
-       return if $no_shutdown;
-       $reactor->shutdown();
-    };
-
-    $sigid = $if->connect_to_signal('JobRemoved', sub {
-       my ($id, $removed_job, $signaled_unit, $result) = @_;
-       return if $signaled_unit ne $unit || $removed_job ne $job;
-       $cleanup->(0);
-       die "systemd job failed\n" if $result ne 'done';
-       $done = 1;
-    });
-
-    my $on_timeout = sub {
-       $cleanup->(0);
-       die "systemd job timed out\n";
-    };
-
-    $timer = $reactor->add_timeout($timeout * 1000, Net::DBus::Callback->new(method => $on_timeout))
-       if defined($timeout);
-    $job = $if->StartTransientUnit($unit, 'fail', $properties, []);
-    $reactor->run();
-    $cleanup->(1);
-    die "systemd job never completed\n" if !$done;
+sub fchownat($$$$$) {
+    my ($dirfd, $pathname, $owner, $group, $flags) = @_;
+    return syscall(PVE::Syscall::fchownat, $dirfd, $pathname, $owner, $group, $flags) == 0;
  }
  
  my $salt_starter = time();
@@ -1494,4 +1689,331 @@ sub encrypt_pw {
      return crypt(encode("utf8", $pw), "\$5\$$salt\$");
  }
  
+# intended usage: convert_size($val, "kb" => "gb")
+# we round up to the next integer by default
+# E.g. `convert_size(1023, "b" => "kb")` returns 1
+# use $no_round_up to switch this off, above example would then return 0
+# this is also true for converting down e.g. 0.0005 gb to mb returns 1
+# (0 if $no_round_up is true)
+# allowed formats for value:
+# 1234
+# 1234.
+# 1234.1234
+# .1234
+sub convert_size {
+    my ($value, $from, $to, $no_round_up) = @_;
+
+    my $units = {
+       b  => 0,
+       kb => 1,
+       mb => 2,
+       gb => 3,
+       tb => 4,
+       pb => 5,
+    };
+
+    die "no value given"
+       if !defined($value) || $value eq "";
+
+    $from = lc($from // ''); $to = lc($to // '');
+    die "unknown 'from' and/or 'to' units ($from => $to)"
+       if !defined($units->{$from}) || !defined($units->{$to});
+
+    die "value '$value' is not a valid, positive number"
+       if $value !~ m/^(?:[0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)$/;
+
+    my $shift_amount = ($units->{$from} - $units->{$to}) * 10;
+
+    $value *= 2**$shift_amount;
+    $value++ if !$no_round_up && ($value - int($value)) > 0.0;
+
+    return int($value);
+}
+
+# uninterruptible readline
+# retries on EINTR
+sub readline_nointr {
+    my ($fh) = @_;
+    my $line;
+    while (1) {
+       $line = <$fh>;
+       last if defined($line) || ($! != EINTR);
+    }
+    return $line;
+}
+
+my $host_arch;
+sub get_host_arch {
+    $host_arch = (POSIX::uname())[4] if !$host_arch;
+    return $host_arch;
+}
+
+# Devices are: [ (12 bits minor) (12 bits major) (8 bits minor) ]
+sub dev_t_major($) {
+    my ($dev_t) = @_;
+    return (int($dev_t) & 0xfff00) >> 8;
+}
+
+sub dev_t_minor($) {
+    my ($dev_t) = @_;
+    $dev_t = int($dev_t);
+    return (($dev_t >> 12) & 0xfff00) | ($dev_t & 0xff);
+}
+
+# Given an array of array refs [ \[a b c], \[a b b], \[e b a] ]
+# Returns the intersection of elements as a single array [a b]
+sub array_intersect {
+    my ($arrays) = @_;
+
+    if (!ref($arrays->[0])) {
+       $arrays = [ grep { ref($_) eq 'ARRAY' } @_ ];
+    }
+
+    return [] if scalar(@$arrays) == 0;
+    return $arrays->[0] if scalar(@$arrays) == 1;
+
+    my $array_unique = sub {
+       my %seen = ();
+       return grep { ! $seen{ $_ }++ } @_;
+    };
+
+    # base idea is to get all unique members from the first array, then
+    # check the common elements with the next (uniquely made) one, only keep
+    # those. Repeat for every array and at the end we only have those left
+    # which exist in all arrays
+    my $return_arr = [ $array_unique->(@{$arrays->[0]}) ];
+    for my $i (1 .. $#$arrays) {
+       my %count = ();
+       # $return_arr is already unique, explicit at before the loop, implicit below.
+       foreach my $element (@$return_arr, $array_unique->(@{$arrays->[$i]})) {
+           $count{$element}++;
+       }
+       $return_arr = [];
+       foreach my $element (keys %count) {
+           push @$return_arr, $element if $count{$element} > 1;
+       }
+       last if scalar(@$return_arr) == 0; # empty intersection, early exit
+    }
+
+    return $return_arr;
+}
+
+sub open_tree($$$) {
+    my ($dfd, $pathname, $flags) = @_;
+    return PVE::Syscall::file_handle_result(syscall(
+       &PVE::Syscall::open_tree,
+       $dfd,
+       $pathname,
+       $flags,
+    ));
+}
+
+sub move_mount($$$$$) {
+    my ($from_dirfd, $from_pathname, $to_dirfd, $to_pathname, $flags) = @_;
+    return 0 == syscall(
+       &PVE::Syscall::move_mount,
+       $from_dirfd,
+       $from_pathname,
+       $to_dirfd,
+       $to_pathname,
+       $flags,
+    );
+}
+
+sub fsopen($$) {
+    my ($fsname, $flags) = @_;
+    return PVE::Syscall::file_handle_result(syscall(&PVE::Syscall::fsopen, $fsname, $flags));
+}
+
+sub fsmount($$$) {
+    my ($fd, $flags, $mount_attrs) = @_;
+    return PVE::Syscall::file_handle_result(syscall(
+       &PVE::Syscall::fsmount,
+       $fd,
+       $flags,
+       $mount_attrs,
+    ));
+}
+
+sub fspick($$$) {
+    my ($dirfd, $pathname, $flags) = @_;
+    return PVE::Syscall::file_handle_result(syscall(
+       &PVE::Syscall::fspick,
+       $dirfd,
+       $pathname,
+       $flags,
+    ));
+}
+
+sub fsconfig($$$$$) {
+    my ($fd, $command, $key, $value, $aux) = @_;
+    return 0 == syscall(&PVE::Syscall::fsconfig, $fd, $command, $key, $value, $aux);
+}
+
+# "raw" mount, old api, not for generic use (as it does not invoke any helpers).
+# use for lower level stuff such as bind/remount/... or simple tmpfs mounts
+sub mount($$$$$) {
+    my ($source, $target, $filesystemtype, $mountflags, $data) = @_;
+    return 0 == syscall(
+       &PVE::Syscall::mount,
+       $source,
+       $target,
+       $filesystemtype,
+       $mountflags,
+       $data,
+    );
+}
+
+# size is optional and defaults to 256, note that xattr limits are FS specific and that xattrs can
+# get arbitrary long. pass `0` for $size in array context to get the actual size of a value
+sub getxattr($$;$) {
+    my ($path_or_handle, $name, $size) = @_;
+    $size //= 256;
+    my $buf = pack("x${size}");
+
+    my $xattr_size = -1; # the actual size of the xattr, can be zero
+    if (defined(my $fd = fileno($path_or_handle))) {
+       $xattr_size = syscall(&PVE::Syscall::fgetxattr, $fd, $name, $buf, $size);
+    } else {
+       $xattr_size = syscall(&PVE::Syscall::getxattr, $path_or_handle, $name, $buf, $size);
+    }
+    if ($xattr_size < 0) {
+       return undef;
+    }
+    return wantarray ? ($buf, $xattr_size) : $buf;
+}
+
+# NOTE: can take either a path or an open file handle, i.e., its multiplexing setxattr and fsetxattr
+sub setxattr($$$;$) {
+    my ($path_or_handle, $name, $value, $flags) = @_;
+    my $size = length($value); # NOTE: seems to get correct length also for wide-characters in text..
+
+    if (defined(my $fd = fileno($path_or_handle))) {
+       return 0 == syscall(&PVE::Syscall::fsetxattr, $fd, $name, $value, $size, $flags // 0);
+    } else {
+       return 0 == syscall(&PVE::Syscall::setxattr, $path_or_handle, $name, $value, $size, $flags // 0);
+    }
+}
+
+sub safe_compare {
+    my ($left, $right, $cmp) = @_;
+
+    return 0 if !defined($left) && !defined($right);
+    return -1 if !defined($left);
+    return 1 if !defined($right);
+    return $cmp->($left, $right);
+}
+
+
+# opts is a hash ref with the following known properties
+#  allow_overwrite - if 1, overwriting existing files is allowed, use with care. Default to false
+#  hash_required - if 1, at least one checksum has to be specified otherwise an error will be thrown
+#  http_proxy
+#  https_proxy
+#  verify_certificates - if 0 (false) we tell wget to ignore untrusted TLS certs. Default to true
+#  md5sum|sha(1|224|256|384|512)sum - the respective expected checksum string
+sub download_file_from_url {
+    my ($dest, $url, $opts) = @_;
+
+    my ($checksum_algorithm, $checksum_expected);
+    for ('sha512', 'sha384', 'sha256', 'sha224', 'sha1', 'md5') {
+       if (defined($opts->{"${_}sum"})) {
+           $checksum_algorithm = $_;
+           $checksum_expected = $opts->{"${_}sum"};
+           last;
+       }
+    }
+    die "checksum required but not specified\n" if ($opts->{hash_required} && !$checksum_algorithm);
+
+    print "downloading $url to $dest\n";
+
+    if (-f $dest) {
+       if ($checksum_algorithm) {
+           print "calculating checksum of existing file...";
+           my $checksum_got = get_file_hash($checksum_algorithm, $dest);
+
+           if (lc($checksum_got) eq lc($checksum_expected)) {
+               print "OK, got correct file already, no need to download\n";
+               return;
+           } elsif ($opts->{allow_overwrite}) {
+               print "checksum mismatch: got '$checksum_got' != expect '$checksum_expected', re-download\n";
+           } else {
+               print "\n";  # the front end expects the error to reside at the last line without any noise
+               die "checksum mismatch: got '$checksum_got' != expect '$checksum_expected', aborting\n";
+           }
+       } elsif (!$opts->{allow_overwrite}) {
+           die "refusing to override existing file '$dest'\n";
+       }
+    }
+
+    my $tmpdest = "$dest.tmp.$$";
+    eval {
+       local $SIG{INT} = sub {
+           unlink $tmpdest or warn "could not cleanup temporary file: $!";
+           die "got interrupted by signal\n";
+       };
+
+       { # limit the scope of the ENV change
+           local %ENV;
+           if ($opts->{http_proxy}) {
+               $ENV{http_proxy} = $opts->{http_proxy};
+           }
+           if ($opts->{https_proxy}) {
+               $ENV{https_proxy} = $opts->{https_proxy};
+           }
+
+           my $cmd = ['wget', '--progress=dot:giga', '-O', $tmpdest, $url];
+
+           if (!($opts->{verify_certificates} // 1)) { # default to true
+               push @$cmd, '--no-check-certificate';
+           }
+
+           run_command($cmd, errmsg => "download failed");
+       }
+
+       if ($checksum_algorithm) {
+           print "calculating checksum...";
+
+           my $checksum_got = get_file_hash($checksum_algorithm, $tmpdest);
+
+           if (lc($checksum_got) eq lc($checksum_expected)) {
+               print "OK, checksum verified\n";
+           } else {
+               print "\n";  # the front end expects the error to reside at the last line without any noise
+               die "checksum mismatch: got '$checksum_got' != expect '$checksum_expected'\n";
+           }
+       }
+
+       rename($tmpdest, $dest) or die "unable to rename temporary file: $!\n";
+    };
+    if (my $err = $@) {
+       unlink $tmpdest or warn "could not cleanup temporary file: $!";
+       die $err;
+    }
+
+    print "download of '$url' to '$dest' finished\n";
+}
+
+sub get_file_hash {
+    my ($algorithm, $filename) = @_;
+
+    my $algorithm_map = {
+       'md5' => sub { Digest::MD5->new },
+       'sha1' => sub { Digest::SHA->new(1) },
+       'sha224' => sub { Digest::SHA->new(224) },
+       'sha256' => sub { Digest::SHA->new(256) },
+       'sha384' => sub { Digest::SHA->new(384) },
+       'sha512' => sub { Digest::SHA->new(512) },
+    };
+
+    my $digester = $algorithm_map->{$algorithm}->() or die "unknown algorithm '$algorithm'\n";
+
+    open(my $fh, '<', $filename) or die "unable to open '$filename': $!\n";
+    binmode($fh);
+
+    my $digest = $digester->addfile($fh)->hexdigest;
+
+    return lc($digest);
+}
+
  1;