# see http://web.archive.org/web/20110726052341/http://feedparser.org/docs/html-sanitization.html
- my @allow = qw(a abbr acronym address area b big blockquote br button caption center cite code col colgroup dd del dfn dir div dl dt em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label legend li map menu ol optgroup option p pre q s samp select small span strike strong sub sup title table tbody td textarea tfoot th thead tr tt u ul var html body);
+ my @allow = qw(a abbr acronym address area b big blockquote br button caption center cite code col colgroup dd del dfn dir div dl dt em fieldset font form h1 h2 h3 h4 h5 h6 head hr i img input ins kbd label legend li map menu ol optgroup option p pre q s samp select small span style strike strong sub sup title table tbody td textarea tfoot th thead tr tt u ul var html body);
+
+ my @rules = ( script => 0 );
- my @rules = (
- script => 0,
- img => {
- alt => 1, # alt attribute allowed
- width => 1,
- height => 1,
- border => 1,
- src => $viewimages ? qr{^(?!(?:java)?script)}i : 0,
- '*' => 0, # deny all other attributes
- },
- );
- #
my @default = (
0 => # default rule, deny all tags
{
alt => 1,
axis => 1,
border => 1,
+ bgcolor => 1,
cellpadding => 1,
cellspacing => 1,
char => 1,
shape => 1,
size => 1,
span => 1,
+ src => $viewimages ? qr{^(?!(?:java)?script)}i : 0,
start => 1,
+ style => 1,
summary => 1,
tabindex => 1,
target => 1,
rules => \@rules,
default => \@default,
comment => 0,
- script => 0,
- style => 0,
process => 0,
);
+ $scrubber->style(1);
+
return $scrubber;
}
sub read_raw_email {
- my ($path) = @_;
+ my ($path, $maxbytes) = @_;
open (my $fh, '<', $path) || die "unable to open '$path' - $!\n";
my $cs = $head->mime_attr("content-type.charset");
+ my $bytes = 0;
+
while (defined(my $line = <$fh>)) {
+ $bytes += length ($line);
if ($cs) {
$data .= decode($cs, $line);
} else {
$data .= $line;
}
+ if (defined($maxbytes) && ($bytes >= $maxbytes)) {
+ $data .= "\n... mail truncated (> $maxbytes bytes)\n";
+ last;
+ }
}
close($fh);
};
my $find_images = sub {
- my ($entity) = @_;
-
- my $res = {};
+ my ($cid_hash, $entity) = @_;
foreach my $part ($entity->parts) {
if (my $rawcid = $part->head->get('Content-Id')) {
my $ctype = $part->head->mime_attr('Content-type') // '';
if ($ctype =~ m!^image/!) {
if (defined(my $raw = $read_part->($part))) {
- $res->{$cid} = "data:$ctype;base64," . encode_base64($raw, '');
+ $cid_hash->{$cid} = "data:$ctype;base64," . encode_base64($raw, '');
}
}
}
}
}
-
- return $res;
};
sub entity_to_html {
}
# get related/embedded images as data uris
- my $cid_hash = $find_images->($entity);
+ $find_images->($cid_hash, $entity);
my $alt = $multi_part || $html_part || $text_part;
- return entity_to_html ($alt, $cid_hash, $viewimages, $allowhref) if $alt;
+ return entity_to_html($alt, $cid_hash, $viewimages, $allowhref) if $alt;
}
return undef;