]>
git.proxmox.com Git - pmg-api.git/blob - PMG/HTMLMail.pm
12 use HTML
:: TreeBuilder
;
16 my ( $tree, $cid_hash ) = @_ ;
20 my ( $tag, $node, $start, $depth );
26 $tag = $node ->{ '_tag' };
28 # try to open a new window when user activates a anchor
29 $node ->{ target
} = '_blank' if $tag eq 'a' ;
32 if ( $node ->{ src
} =~ m/^cid:(\S+)$/ ) {
33 if ( my $datauri = $cid_hash ->{ $1 }) {
34 $node ->{ src
} = $datauri ;
39 if ( $start ) { # on the way in
40 push ( @html, $node -> starttag );
43 push ( @html, $node -> endtag );
47 $node = encode_entities
( $node )
48 # That does magic things if $entities is undef.
49 unless $HTML :: Tagset
:: isCDATA_Parent
{ $_ [ 3 ]{ '_tag' } };
50 # To keep from amp-escaping children of script et al.
51 # That doesn't deal with descendants; but then, CDATA
52 # parents shouldn't /have/ descendants other than a
53 # text children (or comments?)
60 return join ( '' , @html, " \n " );
64 my ( $viewimages, $allowhref ) = @_ ;
66 # see http://web.archive.org/web/20110726052341/http://feedparser.org/docs/html-sanitization.html
68 my @allow = qw(a abbr acronym address area b big blockquote br button caption center cite code col colgroup dd del dfn dir div dl dt em fieldset font form h1 h2 h3 h4 h5 h6 head hr i img input ins kbd label legend li map menu ol optgroup option p pre q s samp select small span style strike strong sub sup title table tbody td textarea tfoot th thead tr tt u ul var html body) ;
70 my @rules = ( script
=> 0 );
73 0 => # default rule, deny all tags
75 '*' => 0 , # default rule, deny all attributes
78 'accept-charset' => 1 ,
106 # only allow http:// and https:// hrefs
107 'href' => $allowhref ?
qr{^https?://[^/]+/}i : 0 ,
135 src
=> $viewimages ?
qr{^(?!(?:java)?script)}i : 0 ,
151 my $scrubber = HTML
:: Scrubber-
> new (
154 default => \
@default,
165 my ( $path, $maxbytes ) = @_ ;
167 open ( my $fh, '<' , $path ) || die "unable to open ' $path ' - $!\n " ;
174 while ( defined ( my $line = < $fh >)) {
175 $raw_header .= $line ;
177 push @$header, $line ;
178 last if $line =~ m/^\s*$/ ;
181 my $head = MIME
:: Head-
> new ( $header );
183 my $cs = $head -> mime_attr ( "content-type.charset" );
187 while ( defined ( my $line = < $fh >)) {
188 $bytes += length ( $line );
190 $data .= decode
( $cs, $line );
194 if ( defined ( $maxbytes ) && ( $bytes >= $maxbytes )) {
195 $data .= " \n ... mail truncated (> $maxbytes bytes) \n " ;
202 return ( $raw_header, $data );
205 my $read_part = sub {
208 my $io = $part -> open ( "r" );
209 return undef if ! $io ;
212 while ( defined ( my $line = $io -> getline )) { $raw .= $line ; }
218 my $find_images = sub {
223 foreach my $part ( $entity -> parts ) {
224 if ( my $rawcid = $part -> head -> get ( 'Content-Id' )) {
225 if ( $rawcid =~ m/^\s*<(\S+)>\s*$/ ) {
227 my $ctype = $part -> head -> mime_attr ( 'Content-type' ) // '' ;
228 if ( $ctype =~ m!^image/! ) {
229 if ( defined ( my $raw = $read_part ->( $part ))) {
230 $res ->{ $cid } = "data: $ctype ;base64," . encode_base64
( $raw, '' );
241 my ( $entity, $cid_hash, $viewimages, $allowhref ) = @_ ;
243 my $mime_type = lc ( $entity -> mime_type );;
245 if ( $mime_type eq 'text/plain' ) {
246 my $raw = $read_part ->( $entity ) // '' ;
247 my $html = "<pre> \n " ;
249 if ( defined ( my $cs = $entity -> head -> mime_attr ( "content-type.charset" ))) {
250 $html .= PMG
:: Utils
:: decode_to_html
( $cs, $raw );
252 $html .= encode_entities
( $raw );
259 } elsif ( $mime_type eq 'text/html' ) {
260 my $raw = $read_part ->( $entity ) // '' ;
262 if ( defined ( my $cs = $entity -> head -> mime_attr ( "content-type.charset" ))) {
263 eval { $raw = decode
( $cs, $raw ); }; # ignore errors here
266 # create a well formed tree
267 my $tree = HTML
:: TreeBuilder-
> new ();
271 my $whtml = dump_html
( $tree, $viewimages ?
$cid_hash : {});
274 # remove dangerous/unneeded elements
275 my $scrubber = getscrubber
( $viewimages, $allowhref );
276 return $scrubber -> scrub ( $whtml );
278 } elsif ( $mime_type =~ m
|^ multipart
/| i
) {
283 foreach my $part ( $entity -> parts ) {
284 my $subtype = lc ( $part -> mime_type );
285 $multi_part = $part if ! defined ( $multi_part ) && $subtype =~ m
| multipart
/| i
;
286 $html_part = $part if ! defined ( $html_part ) && $subtype eq 'text/html' ;
287 $text_part = $part if ! defined ( $text_part ) && $subtype eq 'text/plain' ;
290 # get related/embedded images as data uris
291 my $cid_hash = $find_images ->( $entity );
293 my $alt = $multi_part || $html_part || $text_part ;
295 return entity_to_html
( $alt, $cid_hash, $viewimages, $allowhref ) if $alt ;
302 my ( $path, $raw, $viewimages, $allowhref ) = @_ ;
304 my $dumpdir = "/tmp/.proxdumpview_ $$ " ;
311 my ( $header, $content ) = read_raw_email
( $path );
314 encode_entities
( $header ) .
316 encode_entities
( $content ) .
321 my $parser = new MIME
:: Parser
;
322 $parser -> extract_nested_messages ( 0 );
326 # Create and set the output directory:
327 (- d
$dumpdir || mkdir ( $dumpdir , 0755 )) ||
328 die "can't create $dumpdir : $! : ERROR" ;
330 die "can't write to directory $dumpdir : $! : ERROR" ;
332 $parser -> output_dir ( $dumpdir );
334 my $entity = $parser -> parse_open ( $path );
336 # bug fix for bin/tests/content/mimeparser.txt
337 if ( $entity -> mime_type =~ m
| multipart
/| i
&& ! $entity -> head -> multipart_boundary ) {
338 $entity -> head -> mime_attr ( 'Content-type' => "application/x-unparseable-multipart" );
341 $html = entity_to_html
( $entity, {}, $viewimages, $allowhref );
348 die "unable to parse mail: $err " if $err ;