safe_read_from: bump default size limit to 1 MiB to match pmxcfs

[pve-common.git] / src / PVE / Tools.pm
diff --git a/src/PVE/Tools.pm b/src/PVE/Tools.pm

index 5d53127445885f5350b51f72014d6a3c9ecdc1cd..a1571ea3d9fa8c05746fea45ad7c525b197d5c6d 100644 (file)
--- a/src/PVE/Tools.pm
+++ b/src/PVE/Tools.pm
@@ -2,29 +2,32 @@ package PVE::Tools;
  
  use strict;
  use warnings;
-use POSIX qw(EINTR EEXIST EOPNOTSUPP);
-use IO::Socket::IP;
-use Socket qw(AF_INET AF_INET6 AI_ALL AI_V4MAPPED AI_CANONNAME SOCK_DGRAM
-             IPPROTO_TCP);
-use IO::Select;
+
+use Date::Format qw(time2str);
+use Digest::MD5;
+use Digest::SHA;
+use Encode;
+use Fcntl qw(:DEFAULT :flock);
  use File::Basename;
  use File::Path qw(make_path);
  use Filesys::Df (); # don't overwrite our df()
-use IO::Pipe;
-use IO::File;
  use IO::Dir;
+use IO::File;
  use IO::Handle;
+use IO::Pipe;
+use IO::Select;
+use IO::Socket::IP;
  use IPC::Open3;
-use Fcntl qw(:DEFAULT :flock);
-use base 'Exporter';
-use URI::Escape;
-use Encode;
-use Digest::SHA;
  use JSON;
-use Text::ParseWords;
+use POSIX qw(EINTR EEXIST EOPNOTSUPP);
+use Scalar::Util 'weaken';
+use Socket qw(AF_INET AF_INET6 AI_ALL AI_V4MAPPED AI_CANONNAME SOCK_DGRAM IPPROTO_TCP);
  use String::ShellQuote;
+use Text::ParseWords;
  use Time::HiRes qw(usleep gettimeofday tv_interval alarm);
-use Scalar::Util 'weaken';
+use URI::Escape;
+use base 'Exporter';
+
  use PVE::Syscall;
  
  # avoid warning when parsing long hex values with hex()
@@ -46,6 +49,7 @@ template_replace
  safe_print
  trim
  extract_param
+extract_sensitive_params
  file_copy
  get_host_arch
  O_PATH
@@ -84,6 +88,9 @@ our $IPV6RE = "(?:" .
  
  our $IPRE = "(?:$IPV4RE|$IPV6RE)";
  
+our $EMAIL_USER_RE = qr/[\w\+\-\~]+(\.[\w\+\-\~]+)*/;
+our $EMAIL_RE = qr/$EMAIL_USER_RE@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)*/;
+
  use constant {CLONE_NEWNS   => 0x00020000,
                CLONE_NEWUTS  => 0x04000000,
                CLONE_NEWIPC  => 0x08000000,
@@ -92,11 +99,17 @@ use constant {CLONE_NEWNS   => 0x00020000,
                CLONE_NEWNET  => 0x40000000};
  
  use constant {O_PATH    => 0x00200000,
+              O_CLOEXEC => 0x00080000,
                O_TMPFILE => 0x00410000}; # This includes O_DIRECTORY
  
  use constant {AT_EMPTY_PATH => 0x1000,
                AT_FDCWD => -100};
  
+# from <linux/fs.h>
+use constant {RENAME_NOREPLACE => (1 << 0),
+              RENAME_EXCHANGE  => (1 << 1),
+              RENAME_WHITEOUT  => (1 << 2)};
+
  sub run_with_timeout {
      my ($timeout, $code, @param) = @_;
  
@@ -289,7 +302,7 @@ sub safe_read_from {
      my ($fh, $max, $oneline, $filename) = @_;
  
      # pmxcfs file size limit
-    $max = 512*1024 if !$max;
+    $max = 1024 * 1024 if !$max;
  
      my $subject = defined($filename) ? "file '$filename'" : 'input';
  
@@ -441,13 +454,12 @@ sub run_command {
  
             $pid = open3($writer, $reader, $error, @$cmd) || die $!;
  
-           # if we pipe fron STDIN, open3 closes STDIN, so we we
-           # a perl warning "Filehandle STDIN reopened as GENXYZ .. "
-           # as soon as we open a new file.
+           # if we pipe fron STDIN, open3 closes STDIN, so we get a perl warning like
+           # "Filehandle STDIN reopened as GENXYZ .. " as soon as we open a new file.
             # to avoid that we open /dev/null
             if (!ref($writer) && !defined(fileno(STDIN))) {
                 POSIX::close(0);
-               open(STDIN, "</dev/null");
+               open(STDIN, '<', '/dev/null');
             }
         };
  
@@ -472,7 +484,7 @@ sub run_command {
             close $writer;
         }
  
-       my $select = new IO::Select;
+       my $select = IO::Select->new();
         $select->add($reader) if ref($reader);
         $select->add($error);
  
@@ -497,12 +509,13 @@ sub run_command {
                 if ($h eq $reader) {
                     if ($outfunc || $logfunc) {
                         eval {
-                           $outlog .= $buf;
-                           while ($outlog =~ s/^([^\010\r\n]*)(\r|\n|(\010)+|\r\n)//s) {
-                               my $line = $1;
+                           while ($buf =~ s/^([^\010\r\n]*)(?:\n|(?:\010)+|\r\n?)//) {
+                               my $line = $outlog . $1;
+                               $outlog = '';
                                 &$outfunc($line) if $outfunc;
                                 &$logfunc($line) if $logfunc;
                             }
+                           $outlog .= $buf;
                         };
                         my $err = $@;
                         if ($err) {
@@ -517,12 +530,13 @@ sub run_command {
                 } elsif ($h eq $error) {
                     if ($errfunc || $logfunc) {
                         eval {
-                           $errlog .= $buf;
-                           while ($errlog =~ s/^([^\010\r\n]*)(\r|\n|(\010)+|\r\n)//s) {
-                               my $line = $1;
+                           while ($buf =~ s/^([^\010\r\n]*)(?:\n|(?:\010)+|\r\n?)//) {
+                               my $line = $errlog . $1;
+                               $errlog = '';
                                 &$errfunc($line) if $errfunc;
                                 &$logfunc($line) if $logfunc;
                             }
+                           $errlog .= $buf;
                         };
                         my $err = $@;
                         if ($err) {
@@ -802,6 +816,28 @@ sub extract_param {
      return $res;
  }
  
+# For extracting sensitive keys (e.g. password), to avoid writing them to www-data owned configs
+sub extract_sensitive_params :prototype($$$) {
+    my ($param, $sensitive_list, $delete_list) = @_;
+
+    my %delete = map { $_ => 1 } ($delete_list || [])->@*;
+
+    my $sensitive = {};
+    for my $opt (@$sensitive_list) {
+       # handle deletions as explicitly setting `undef`, so subs which only have $param but not
+       # $delete_list available can recognize them. Afterwards new values  may override.
+       if (exists($delete{$opt})) {
+           $sensitive->{$opt} = undef;
+       }
+
+       if (defined(my $value = extract_param($param, $opt))) {
+           $sensitive->{$opt} = $value;
+       }
+    }
+
+    return $sensitive;
+}
+
  # Note: we use this to wait until vncterm/spiceterm is ready
  sub wait_for_vnc_port {
      my ($port, $family, $timeout) = @_;
@@ -1125,6 +1161,8 @@ sub upid_read_status {
             return 'OK';
         } elsif ($line =~ m/^TASK ERROR: (.+)$/) {
             return $1;
+       } elsif ($line =~ m/^TASK (WARNINGS: \d+)$/) {
+           return $1;
         } else {
             return "unexpected status";
         }
@@ -1132,6 +1170,31 @@ sub upid_read_status {
      return "unable to read tail (got $br bytes)";
  }
  
+# Check if the status returned by upid_read_status is an error status.
+# If the status could not be parsed it's also treated as an error.
+sub upid_status_is_error {
+    my ($status) = @_;
+
+    return !($status eq 'OK' || $status =~ m/^WARNINGS: \d+$/);
+}
+
+# takes the parsed status and returns the type, either ok, warning, error or unknown
+sub upid_normalize_status_type {
+    my ($status) = @_;
+
+    if (!$status) {
+       return 'unknown';
+    } elsif ($status eq 'OK') {
+       return 'ok';
+    } elsif ($status =~ m/^WARNINGS: \d+$/) {
+       return 'warning';
+    } elsif ($status eq 'unexpected status') {
+       return 'unknown';
+    } else {
+       return 'error';
+    }
+}
+
  # useful functions to store comments in config files
  sub encode_text {
      my ($text) = @_;
@@ -1357,8 +1420,10 @@ sub unpack_sockaddr_in46 {
  
  sub getaddrinfo_all {
      my ($hostname, @opts) = @_;
-    my %hints = ( flags => AI_V4MAPPED | AI_ALL,
-                  @opts );
+    my %hints = (
+       flags => AI_V4MAPPED | AI_ALL,
+       @opts,
+    );
      my ($err, @res) = Socket::getaddrinfo($hostname, '0', \%hints);
      die "failed to get address info for: $hostname: $err\n" if $err;
      return @res;
@@ -1426,88 +1491,119 @@ sub fsync($) {
      return 0 == syscall(PVE::Syscall::fsync, $fileno);
  }
  
+sub renameat2($$$$$) {
+    my ($olddirfd, $oldpath, $newdirfd, $newpath, $flags) = @_;
+    return 0 == syscall(PVE::Syscall::renameat2, $olddirfd, $oldpath, $newdirfd, $newpath, $flags);
+}
+
  sub sync_mountpoint {
      my ($path) = @_;
-    sysopen my $fd, $path, O_PATH or die "failed to open $path: $!\n";
-    my $result = syncfs(fileno($fd));
+    sysopen my $fd, $path, O_RDONLY|O_CLOEXEC or die "failed to open $path: $!\n";
+    my $syncfs_err;
+    if (!syncfs(fileno($fd))) {
+       $syncfs_err = "$!";
+    }
      close($fd);
-    return $result;
+    die "syncfs '$path' failed - $syncfs_err\n" if defined $syncfs_err;
+}
+
+my sub check_mail_addr {
+    my ($addr) = @_;
+    die "'$addr' does not look like a valid email address or username\n"
+       if $addr !~ /^$EMAIL_RE$/ && $addr !~ /^$EMAIL_USER_RE$/;
  }
  
  # support sending multi-part mail messages with a text and or a HTML part
  # mailto may be a single email string or an array of receivers
  sub sendmail {
      my ($mailto, $subject, $text, $html, $mailfrom, $author) = @_;
-    my $mail_re = qr/[^-a-zA-Z0-9+._@]/;
  
      $mailto = [ $mailto ] if !ref($mailto);
  
-    foreach (@$mailto) {
-       die "illegal character in mailto address\n"
-           if ($_ =~ $mail_re);
-    }
-
-    my $rcvrtxt = join (', ', @$mailto);
+    check_mail_addr($_) for $mailto->@*;
+    my $to_quoted = [ map { shellquote($_) } $mailto->@* ];
  
      $mailfrom = $mailfrom || "root";
-    die "illegal character in mailfrom address\n"
-       if $mailfrom =~ $mail_re;
+    check_mail_addr($mailfrom);
+    my $from_quoted = shellquote($mailfrom);
  
-    $author = $author || 'Proxmox VE';
+    $author = $author // 'Proxmox VE';
  
-    open (MAIL, "|-", "sendmail", "-B", "8BITMIME", "-f", $mailfrom, "--", @$mailto) ||
-       die "unable to open 'sendmail' - $!";
+    open (my $mail, "|-", "sendmail", "-B", "8BITMIME", "-f", $from_quoted, "--", $to_quoted->@*)
+       or die "unable to open 'sendmail' - $!";
  
-    # multipart spec see https://www.ietf.org/rfc/rfc1521.txt
-    my $boundary = "----_=_NextPart_001_".int(time).$$;
+    my $is_multipart = $text && $html;
+    my $boundary = "----_=_NextPart_001_" . int(time()) . $$; # multipart spec, see rfc 1521
  
-    print MAIL "Content-Type: multipart/alternative;\n";
-    print MAIL "\tboundary=\"$boundary\"\n";
-    print MAIL "MIME-Version: 1.0\n";
+    $subject = Encode::encode('MIME-Header', $subject) if $subject =~ /[^[:ascii:]]/;
  
-    print MAIL "FROM: $author <$mailfrom>\n";
-    print MAIL "TO: $rcvrtxt\n";
-    print MAIL "SUBJECT: $subject\n";
-    print MAIL "\n";
-    print MAIL "This is a multi-part message in MIME format.\n\n";
-    print MAIL "--$boundary\n";
+    print $mail "MIME-Version: 1.0\n" if $subject =~ /[^[:ascii:]]/ || $is_multipart;
+
+    print $mail "From: $author <$mailfrom>\n";
+    print $mail "To: " . join(', ', @$mailto) ."\n";
+    print $mail "Date: " . time2str('%a, %d %b %Y %H:%M:%S %z', time()) . "\n";
+    print $mail "Subject: $subject\n";
+
+    if ($is_multipart) {
+       print $mail "Content-Type: multipart/alternative;\n";
+       print $mail "\tboundary=\"$boundary\"\n";
+       print $mail "\n";
+       print $mail "This is a multi-part message in MIME format.\n\n";
+       print $mail "--$boundary\n";
+    }
  
      if (defined($text)) {
-       print MAIL "Content-Type: text/plain;\n";
-       print MAIL "\tcharset=\"UTF8\"\n";
-       print MAIL "Content-Transfer-Encoding: 8bit\n";
-       print MAIL "\n";
+       print $mail "Content-Type: text/plain;\n";
+       print $mail "\tcharset=\"UTF-8\"\n";
+       print $mail "Content-Transfer-Encoding: 8bit\n";
+       print $mail "\n";
  
         # avoid 'remove extra line breaks' issue (MS Outlook)
         my $fill = '  ';
         $text =~ s/^/$fill/gm;
  
-       print MAIL $text;
+       print $mail $text;
  
-       print MAIL "\n--$boundary\n";
+       print $mail "\n--$boundary\n" if $is_multipart;
      }
  
      if (defined($html)) {
-       print MAIL "Content-Type: text/html;\n";
-       print MAIL "\tcharset=\"UTF8\"\n";
-       print MAIL "Content-Transfer-Encoding: 8bit\n";
-       print MAIL "\n";
+       print $mail "Content-Type: text/html;\n";
+       print $mail "\tcharset=\"UTF-8\"\n";
+       print $mail "Content-Transfer-Encoding: 8bit\n";
+       print $mail "\n";
  
-       print MAIL $html;
+       print $mail $html;
  
-       print MAIL "\n--$boundary--\n";
+       print $mail "\n--$boundary--\n" if $is_multipart;
      }
  
-    close(MAIL);
+    close($mail);
  }
  
+# creates a temporary file that does not shows up on the file system hierarchy.
+#
+# Uses O_TMPFILE if available, which makes it just an anon inode that never shows up in the FS.
+# If O_TMPFILE is not available, which unlikely nowadays (added in 3.11 kernel and all FS relevant
+# for us support it) back to open-create + immediate unlink while still holding the file  handle.
+#
+# TODO: to avoid FS dependent features we could (transparently) switch to memfd_create as backend
  sub tempfile {
      my ($perm, %opts) = @_;
  
      # default permissions are stricter than with file_set_contents
      $perm = 0600 if !defined($perm);
  
-    my $dir = $opts{dir} // '/run';
+    my $dir = $opts{dir};
+    if (!$dir) {
+       if (-d "/run/user/$<") {
+           $dir = "/run/user/$<";
+       } elsif ($< == 0) {
+           $dir = "/run";
+       } else {
+           $dir = "/tmp";
+       }
+    }
      my $mode = $opts{mode} // O_RDWR;
      $mode |= O_EXCL if !$opts{allow_links};
  
@@ -1522,6 +1618,7 @@ sub tempfile {
      return $fh;
  }
  
+# create an (ideally) anon file with the $data as content and return its FD-path and FH
  sub tempfile_contents {
      my ($data, $perm, %opts) = @_;
  
@@ -1767,6 +1864,37 @@ sub mount($$$$$) {
      );
  }
  
+# size is optional and defaults to 256, note that xattr limits are FS specific and that xattrs can
+# get arbitrary long. pass `0` for $size in array context to get the actual size of a value
+sub getxattr($$;$) {
+    my ($path_or_handle, $name, $size) = @_;
+    $size //= 256;
+    my $buf = pack("x${size}");
+
+    my $xattr_size = -1; # the actual size of the xattr, can be zero
+    if (defined(my $fd = fileno($path_or_handle))) {
+       $xattr_size = syscall(&PVE::Syscall::fgetxattr, $fd, $name, $buf, $size);
+    } else {
+       $xattr_size = syscall(&PVE::Syscall::getxattr, $path_or_handle, $name, $buf, $size);
+    }
+    if ($xattr_size < 0) {
+       return undef;
+    }
+    return wantarray ? ($buf, $xattr_size) : $buf;
+}
+
+# NOTE: can take either a path or an open file handle, i.e., its multiplexing setxattr and fsetxattr
+sub setxattr($$$;$) {
+    my ($path_or_handle, $name, $value, $flags) = @_;
+    my $size = length($value); # NOTE: seems to get correct length also for wide-characters in text..
+
+    if (defined(my $fd = fileno($path_or_handle))) {
+       return 0 == syscall(&PVE::Syscall::fsetxattr, $fd, $name, $value, $size, $flags // 0);
+    } else {
+       return 0 == syscall(&PVE::Syscall::setxattr, $path_or_handle, $name, $value, $size, $flags // 0);
+    }
+}
+
  sub safe_compare {
      my ($left, $right, $cmp) = @_;
  
@@ -1776,4 +1904,116 @@ sub safe_compare {
      return $cmp->($left, $right);
  }
  
+
+# opts is a hash ref with the following known properties
+#  allow_overwrite - if 1, overwriting existing files is allowed, use with care. Default to false
+#  hash_required - if 1, at least one checksum has to be specified otherwise an error will be thrown
+#  http_proxy
+#  https_proxy
+#  verify_certificates - if 0 (false) we tell wget to ignore untrusted TLS certs. Default to true
+#  md5sum|sha(1|224|256|384|512)sum - the respective expected checksum string
+sub download_file_from_url {
+    my ($dest, $url, $opts) = @_;
+
+    my ($checksum_algorithm, $checksum_expected);
+    for ('sha512', 'sha384', 'sha256', 'sha224', 'sha1', 'md5') {
+       if (defined($opts->{"${_}sum"})) {
+           $checksum_algorithm = $_;
+           $checksum_expected = $opts->{"${_}sum"};
+           last;
+       }
+    }
+    die "checksum required but not specified\n" if ($opts->{hash_required} && !$checksum_algorithm);
+
+    print "downloading $url to $dest\n";
+
+    if (-f $dest) {
+       if ($checksum_algorithm) {
+           print "calculating checksum of existing file...";
+           my $checksum_got = get_file_hash($checksum_algorithm, $dest);
+
+           if (lc($checksum_got) eq lc($checksum_expected)) {
+               print "OK, got correct file already, no need to download\n";
+               return;
+           } elsif ($opts->{allow_overwrite}) {
+               print "checksum mismatch: got '$checksum_got' != expect '$checksum_expected', re-download\n";
+           } else {
+               print "\n";  # the front end expects the error to reside at the last line without any noise
+               die "checksum mismatch: got '$checksum_got' != expect '$checksum_expected', aborting\n";
+           }
+       } elsif (!$opts->{allow_overwrite}) {
+           die "refusing to override existing file '$dest'\n";
+       }
+    }
+
+    my $tmpdest = "$dest.tmp.$$";
+    eval {
+       local $SIG{INT} = sub {
+           unlink $tmpdest or warn "could not cleanup temporary file: $!";
+           die "got interrupted by signal\n";
+       };
+
+       { # limit the scope of the ENV change
+           local %ENV;
+           if ($opts->{http_proxy}) {
+               $ENV{http_proxy} = $opts->{http_proxy};
+           }
+           if ($opts->{https_proxy}) {
+               $ENV{https_proxy} = $opts->{https_proxy};
+           }
+
+           my $cmd = ['wget', '--progress=dot:giga', '-O', $tmpdest, $url];
+
+           if (!($opts->{verify_certificates} // 1)) { # default to true
+               push @$cmd, '--no-check-certificate';
+           }
+
+           run_command($cmd, errmsg => "download failed");
+       }
+
+       if ($checksum_algorithm) {
+           print "calculating checksum...";
+
+           my $checksum_got = get_file_hash($checksum_algorithm, $tmpdest);
+
+           if (lc($checksum_got) eq lc($checksum_expected)) {
+               print "OK, checksum verified\n";
+           } else {
+               print "\n";  # the front end expects the error to reside at the last line without any noise
+               die "checksum mismatch: got '$checksum_got' != expect '$checksum_expected'\n";
+           }
+       }
+
+       rename($tmpdest, $dest) or die "unable to rename temporary file: $!\n";
+    };
+    if (my $err = $@) {
+       unlink $tmpdest or warn "could not cleanup temporary file: $!";
+       die $err;
+    }
+
+    print "download of '$url' to '$dest' finished\n";
+}
+
+sub get_file_hash {
+    my ($algorithm, $filename) = @_;
+
+    my $algorithm_map = {
+       'md5' => sub { Digest::MD5->new },
+       'sha1' => sub { Digest::SHA->new(1) },
+       'sha224' => sub { Digest::SHA->new(224) },
+       'sha256' => sub { Digest::SHA->new(256) },
+       'sha384' => sub { Digest::SHA->new(384) },
+       'sha512' => sub { Digest::SHA->new(512) },
+    };
+
+    my $digester = $algorithm_map->{$algorithm}->() or die "unknown algorithm '$algorithm'\n";
+
+    open(my $fh, '<', $filename) or die "unable to open '$filename': $!\n";
+    binmode($fh);
+
+    my $digest = $digester->addfile($fh)->hexdigest;
+
+    return lc($digest);
+}
+
  1;