]>
git.proxmox.com Git - pmg-api.git/blob - PMG/HTMLMail.pm
12 use HTML
:: TreeBuilder
;
16 my ( $tree, $cid_hash ) = @_ ;
20 my ( $tag, $node, $start, $depth );
26 $tag = $node ->{ '_tag' };
28 # try to open a new window when user activates a anchor
29 $node ->{ target
} = '_blank' if $tag eq 'a' ;
32 if ( $node ->{ src
} =~ m/^cid:(\S+)$/ ) {
33 if ( my $datauri = $cid_hash ->{ $1 }) {
34 $node ->{ src
} = $datauri ;
39 if ( $start ) { # on the way in
40 push ( @html, $node -> starttag );
43 push ( @html, $node -> endtag );
47 $node = encode_entities
( $node )
48 # That does magic things if $entities is undef.
49 unless $HTML :: Tagset
:: isCDATA_Parent
{ $_ [ 3 ]{ '_tag' } };
50 # To keep from amp-escaping children of script et al.
51 # That doesn't deal with descendants; but then, CDATA
52 # parents shouldn't /have/ descendants other than a
53 # text children (or comments?)
60 return join ( '' , @html, " \n " );
64 my ( $viewimages, $allowhref ) = @_ ;
66 # see http://web.archive.org/web/20110726052341/http://feedparser.org/docs/html-sanitization.html
68 my @allow = qw(a abbr acronym address area b big blockquote br button caption center cite code col colgroup dd del dfn dir div dl dt em fieldset font form h1 h2 h3 h4 h5 h6 head hr i img input ins kbd label legend li map menu ol optgroup option p pre q s samp select small span style strike strong sub sup title table tbody td textarea tfoot th thead tr tt u ul var html body) ;
70 my @rules = ( script
=> 0 );
73 0 => # default rule, deny all tags
75 '*' => 0 , # default rule, deny all attributes
78 'accept-charset' => 1 ,
106 # only allow http:// and https:// hrefs
107 'href' => $allowhref ?
qr{^https?://[^/]+/}i : 0 ,
135 src
=> $viewimages ?
qr{^(?!(?:java)?script)}i : 0 ,
151 my $scrubber = HTML
:: Scrubber-
> new (
154 default => \
@default,
165 my ( $path, $maxbytes ) = @_ ;
167 open ( my $fh, '<' , $path ) || die "unable to open ' $path ' - $!\n " ;
174 while ( defined ( my $line = < $fh >)) {
175 $raw_header .= $line ;
177 push @$header, $line ;
178 last if $line =~ m/^\s*$/ ;
181 my $head = MIME
:: Head-
> new ( $header );
183 my $cs = $head -> mime_attr ( "content-type.charset" );
187 while ( defined ( my $line = < $fh >)) {
188 $bytes += length ( $line );
190 $data .= decode
( $cs, $line );
194 if ( defined ( $maxbytes ) && ( $bytes >= $maxbytes )) {
195 $data .= " \n ... mail truncated (> $maxbytes bytes) \n " ;
202 return ( $raw_header, $data );
205 my $read_part = sub {
208 my $io = $part -> open ( "r" );
209 return undef if ! $io ;
212 while ( defined ( my $line = $io -> getline )) { $raw .= $line ; }
218 my $find_images = sub {
219 my ( $cid_hash, $entity ) = @_ ;
221 foreach my $part ( $entity -> parts ) {
222 if ( my $rawcid = $part -> head -> get ( 'Content-Id' )) {
223 if ( $rawcid =~ m/^\s*<(\S+)>\s*$/ ) {
225 my $ctype = $part -> head -> mime_attr ( 'Content-type' ) // '' ;
226 if ( $ctype =~ m!^image/! ) {
227 if ( defined ( my $raw = $read_part ->( $part ))) {
228 $cid_hash ->{ $cid } = "data: $ctype ;base64," . encode_base64
( $raw, '' );
237 my ( $entity, $cid_hash, $viewimages, $allowhref ) = @_ ;
239 my $mime_type = lc ( $entity -> mime_type );;
241 if ( $mime_type eq 'text/plain' ) {
242 my $raw = $read_part ->( $entity ) // '' ;
243 my $html = "<pre> \n " ;
245 if ( defined ( my $cs = $entity -> head -> mime_attr ( "content-type.charset" ))) {
246 $html .= PMG
:: Utils
:: decode_to_html
( $cs, $raw );
248 $html .= encode_entities
( $raw );
255 } elsif ( $mime_type eq 'text/html' ) {
256 my $raw = $read_part ->( $entity ) // '' ;
258 if ( defined ( my $cs = $entity -> head -> mime_attr ( "content-type.charset" ))) {
259 eval { $raw = decode
( $cs, $raw ); }; # ignore errors here
262 # create a well formed tree
263 my $tree = HTML
:: TreeBuilder-
> new ();
267 my $whtml = dump_html
( $tree, $viewimages ?
$cid_hash : {});
270 # remove dangerous/unneeded elements
271 my $scrubber = getscrubber
( $viewimages, $allowhref );
272 return $scrubber -> scrub ( $whtml );
274 } elsif ( $mime_type =~ m
|^ multipart
/| i
) {
279 foreach my $part ( $entity -> parts ) {
280 my $subtype = lc ( $part -> mime_type );
281 $multi_part = $part if ! defined ( $multi_part ) && $subtype =~ m
| multipart
/| i
;
282 $html_part = $part if ! defined ( $html_part ) && $subtype eq 'text/html' ;
283 $text_part = $part if ! defined ( $text_part ) && $subtype eq 'text/plain' ;
286 # get related/embedded images as data uris
287 $find_images ->( $cid_hash, $entity );
289 my $alt = $multi_part || $html_part || $text_part ;
291 return entity_to_html
( $alt, $cid_hash, $viewimages, $allowhref ) if $alt ;
298 my ( $path, $raw, $viewimages, $allowhref ) = @_ ;
300 my $dumpdir = "/tmp/.proxdumpview_ $$ " ;
307 my ( $header, $content ) = read_raw_email
( $path );
310 encode_entities
( $header ) .
312 encode_entities
( $content ) .
317 my $parser = new MIME
:: Parser
;
318 $parser -> extract_nested_messages ( 0 );
322 # Create and set the output directory:
323 (- d
$dumpdir || mkdir ( $dumpdir , 0755 )) ||
324 die "can't create $dumpdir : $! : ERROR" ;
326 die "can't write to directory $dumpdir : $! : ERROR" ;
328 $parser -> output_dir ( $dumpdir );
330 my $entity = $parser -> parse_open ( $path );
332 # bug fix for bin/tests/content/mimeparser.txt
333 if ( $entity -> mime_type =~ m
| multipart
/| i
&& ! $entity -> head -> multipart_boundary ) {
334 $entity -> head -> mime_attr ( 'Content-type' => "application/x-unparseable-multipart" );
337 $html = entity_to_html
( $entity, {}, $viewimages, $allowhref );
344 die "unable to parse mail: $err " if $err ;