From 07f7275e5886402e5b42bcfc5baa391025673a05 Mon Sep 17 00:00:00 2001 From: Stoiko Ivanov Date: Thu, 24 Nov 2022 13:21:06 +0100 Subject: [PATCH] quarantine: handle utf8 data use try_decode_utf8 for sender/receiver of the smtp dialog and mail headers since they're either ASCII (not SMTPUTF8) or UTF-8 (with SMTPUTF8) encoded change the mail regex for wl/bl to basic email/domain syntax without the restriction of ascii only. (whitespace and backslashes are forbidden, but they shouldn't normally occur in email addresses and domains) Signed-off-by: Stoiko Ivanov [ D: Added Commmit message ] Signed-off-by: Dominik Csapak --- src/PMG/API2/Quarantine.pm | 10 +++++----- src/PMG/HTMLMail.pm | 7 ++++--- src/PMG/Quarantine.pm | 13 +++++++------ src/PMG/RuleDB/Spam.pm | 12 ++++++------ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm index 71aaf0c..0260f03 100644 --- a/src/PMG/API2/Quarantine.pm +++ b/src/PMG/API2/Quarantine.pm @@ -141,8 +141,8 @@ my $parse_header_info = sub { my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender'))); $res->{sender} = $sender if $sender && ($sender ne $res->{from}); - $res->{envelope_sender} = $ref->{sender}; - $res->{receiver} = $ref->{receiver} // $ref->{pmail}; + $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender}); + $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail}); $res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid}; $res->{time} = $ref->{time}; $res->{bytes} = $ref->{bytes}; @@ -452,7 +452,7 @@ __PACKAGE__->register_method ({ $sth->execute($quar_type_map->{$quar_type}); while (my $ref = $sth->fetchrow_hashref()) { - push @$res, { mail => $ref->{pmail} }; + push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) }; } return $res; @@ -547,7 +547,7 @@ __PACKAGE__->register_method ({ } while (my $ref = $sth->fetchrow_hashref()) { - push @$res, { mail => $ref->{pmail} }; + push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) }; } return $res; @@ -584,7 +584,7 @@ my $quarantine_api = sub { } if ($check_pmail || $role eq 'quser') { - $sth->execute($pmail); + $sth->execute(encode('UTF-8', $pmail)); } else { $sth->execute(); } diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm index 87f5c40..207c52c 100644 --- a/src/PMG/HTMLMail.pm +++ b/src/PMG/HTMLMail.pm @@ -192,9 +192,10 @@ sub read_raw_email { # read header my $header; while (defined(my $line = <$fh>)) { - $raw_header .= $line; - chomp $line; - push @$header, $line; + my $decoded_line = PMG::Utils::try_decode_utf8($line); + $raw_header .= $decoded_line; + chomp $decoded_line; + push @$header, $decoded_line; last if $line =~ m/^\s*$/; } diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm index 77af8cc..aa6b948 100644 --- a/src/PMG/Quarantine.pm +++ b/src/PMG/Quarantine.pm @@ -3,6 +3,7 @@ package PMG::Quarantine; use strict; use warnings; use Net::SMTP; +use Encode qw(encode); use PVE::SafeSyslog; use PVE::Tools; @@ -16,7 +17,7 @@ sub add_to_blackwhite { my $name = $listname eq 'BL' ? 'BL' : 'WL'; my $oname = $listname eq 'BL' ? 'WL' : 'BL'; - my $qu = $dbh->quote ($username); + my $qu = $dbh->quote (encode('UTF-8', $username)); my $sth = $dbh->prepare( "SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')"); @@ -25,13 +26,13 @@ sub add_to_blackwhite { my $list = { 'WL' => {}, 'BL' => {} }; while (my $ref = $sth->fetchrow_hashref()) { - my $data = $ref->{data}; + my $data = PMG::Utils::try_decode_utf8($ref->{data}); $data =~ s/[,;]/ /g; my @alist = split('\s+', $data); my $tmp = {}; foreach my $a (@alist) { - if ($a =~ m/^[[:ascii:]]+$/) { + if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) { $tmp->{$a} = 1; } } @@ -50,7 +51,7 @@ sub add_to_blackwhite { if ($delete) { delete($list->{$name}->{$v}); } else { - if ($v =~ m/[[:^ascii:]]/) { + if ($v =~ m/[\s\\]/) { die "email address '$v' contains invalid characters\n"; } $list->{$name}->{$v} = 1; @@ -58,8 +59,8 @@ sub add_to_blackwhite { } } - my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || ''); - my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || ''); + my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || ''); + my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || ''); if (!$delete) { my $maxlen = 200000; diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm index 99056a3..bc1d422 100644 --- a/src/PMG/RuleDB/Spam.pm +++ b/src/PMG/RuleDB/Spam.pm @@ -94,7 +94,7 @@ sub parse_addrlist { my $regex = $addr; # SA like checks $regex =~ s/[\000\\\(]/_/gs; # is this really necessasry ? - $regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape possible metachars + $regex =~ s/([^\*\?_\w])/\\$1/g; # escape possible metachars $regex =~ tr/?/./; # replace "?" with "." $regex =~ s/\*+/\.\*/g; # replace "*" with ".*" @@ -149,13 +149,13 @@ sub get_blackwhite { $sth->execute(); while (my $ref = $sth->fetchrow_hashref()) { - my $pmail = lc ($ref->{pmail}); + my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail})); if ($ref->{name} eq 'WL') { $target_info->{$pmail}->{whitelist} = - parse_addrlist($ref->{data}); + parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data})); } elsif ($ref->{name} eq 'BL') { $target_info->{$pmail}->{blacklist} = - parse_addrlist($ref->{data}); + parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data})); } } @@ -205,7 +205,7 @@ sub what_match_targets { ($list = $queue->{blackwhite}->{$pmail}->{whitelist}) && check_addrlist($list, $queue->{all_from_addrs})) { syslog('info', "%s: sender in user (%s) whitelist", - $queue->{logid}, $pmail); + $queue->{logid}, encode('UTF-8', $pmail)); } else { $target_info->{$t}->{marks} = []; # never add additional marks here $target_info->{$t}->{spaminfo} = $info; @@ -234,7 +234,7 @@ sub what_match_targets { $target_info->{$t}->{marks} = []; $target_info->{$t}->{spaminfo} = $info; syslog ('info', "%s: sender in user (%s) blacklist", - $queue->{logid}, $pmail); + $queue->{logid}, encode('UTF-8',$pmail)); } } } -- 2.39.5