]>
git.proxmox.com Git - pmg-api.git/blob - src/PMG/HTMLMail.pm
12 use HTML
:: TreeBuilder
;
19 my ( $tree, $cid_hash ) = @_ ;
23 my ( $tag, $node, $start, $depth );
29 $tag = $node ->{ '_tag' };
31 # try to open a new window when user activates a anchor
32 $node ->{ target
} = '_blank' if $tag eq 'a' ;
35 if ( $node ->{ src
} && $node ->{ src
} =~ m/^cid:(\S+)$/ ) {
36 if ( my $datauri = $cid_hash ->{ $1 }) {
37 $node ->{ src
} = $datauri ;
42 if ( $start ) { # on the way in
43 push ( @html, $node -> starttag );
46 push ( @html, $node -> endtag );
50 $node = encode_entities
( $node )
51 # That does magic things if $entities is undef.
52 unless $HTML :: Tagset
:: isCDATA_Parent
{ $_ [ 3 ]{ '_tag' } };
53 # To keep from amp-escaping children of script et al.
54 # That doesn't deal with descendants; but then, CDATA
55 # parents shouldn't /have/ descendants other than a
56 # text children (or comments?)
63 return join ( '' , @html, " \n " );
67 my ( $viewimages, $allowhref ) = @_ ;
69 # see http://web.archive.org/web/20110726052341/http://feedparser.org/docs/html-sanitization.html
71 my @allow = qw(a abbr acronym address area b big blockquote br button caption center cite code col colgroup dd del dfn dir div dl dt em fieldset font form h1 h2 h3 h4 h5 h6 head hr i img input ins kbd label legend li map menu ol optgroup option p pre q s samp select small span style strike strong sub sup title table tbody td textarea tfoot th thead tr tt u ul var html body) ;
73 my @rules = ( script
=> 0 );
76 0 => # default rule, deny all tags
78 '*' => 0 , # default rule, deny all attributes
81 'accept-charset' => 1 ,
109 # only allow http:// and https:// hrefs
110 'href' => $allowhref ?
qr{^https?://[^/]+/}i : 0 ,
138 src
=> $viewimages ?
qr{^(?!(?:java)?script)}i : 0 ,
154 my $scrubber = HTML
:: Scrubber-
> new (
157 default => \
@default,
168 my ( $path, $maxbytes ) = @_ ;
170 open ( my $fh, '<' , $path ) || die "unable to open ' $path ' - $!\n " ;
177 while ( defined ( my $line = < $fh >)) {
178 $raw_header .= $line ;
180 push @$header, $line ;
181 last if $line =~ m/^\s*$/ ;
184 my $head = MIME
:: Head-
> new ( $header );
186 my $cs = $head -> mime_attr ( "content-type.charset" );
190 while ( defined ( my $line = < $fh >)) {
191 $bytes += length ( $line );
193 $data .= decode
( $cs, $line );
197 if ( defined ( $maxbytes ) && ( $bytes >= $maxbytes )) {
198 $data .= " \n ... mail truncated (> $maxbytes bytes) \n " ;
205 return ( $raw_header, $data );
208 my $read_part = sub {
211 my $io = $part -> open ( "r" );
212 return undef if ! $io ;
215 while ( defined ( my $line = $io -> getline )) { $raw .= $line ; }
221 my $find_images = sub {
222 my ( $cid_hash, $entity ) = @_ ;
224 foreach my $part ( $entity -> parts ) {
225 if ( my $rawcid = $part -> head -> get ( 'Content-Id' )) {
226 if ( $rawcid =~ m/^\s*<(\S+)>\s*$/ ) {
228 my $ctype = $part -> head -> mime_attr ( 'Content-type' ) // '' ;
229 if ( $ctype =~ m!^image/! ) {
230 if ( defined ( my $raw = $read_part ->( $part ))) {
231 $cid_hash ->{ $cid } = "data: $ctype ;base64," . encode_base64
( $raw, '' );
240 my ( $entity, $cid_hash, $viewimages, $allowhref ) = @_ ;
242 my $mime_type = lc ( $entity -> mime_type );;
244 if ( $mime_type eq 'text/plain' ) {
245 my $raw = $read_part ->( $entity ) // '' ;
246 my $html = "<pre> \n " ;
248 if ( defined ( my $cs = $entity -> head -> mime_attr ( "content-type.charset" ))) {
249 $html .= PMG
:: Utils
:: decode_to_html
( $cs, $raw );
251 $html .= encode_entities
( $raw );
258 } elsif ( $mime_type eq 'text/html' ) {
259 my $raw = $read_part ->( $entity ) // '' ;
261 if ( defined ( my $cs = $entity -> head -> mime_attr ( "content-type.charset" ))) {
262 eval { $raw = decode
( $cs, $raw ); }; # ignore errors here
265 # create a well formed tree
266 my $tree = HTML
:: TreeBuilder-
> new ();
270 my $whtml = dump_html
( $tree, $viewimages ?
$cid_hash : {});
273 # remove dangerous/unneeded elements
274 my $scrubber = getscrubber
( $viewimages, $allowhref );
275 return $scrubber -> scrub ( $whtml );
277 } elsif ( $mime_type =~ m
|^ multipart
/| i
) {
282 foreach my $part ( $entity -> parts ) {
283 my $subtype = lc ( $part -> mime_type );
284 $multi_part = $part if ! defined ( $multi_part ) && $subtype =~ m
| multipart
/| i
;
285 $html_part = $part if ! defined ( $html_part ) && $subtype eq 'text/html' ;
286 $text_part = $part if ! defined ( $text_part ) && $subtype eq 'text/plain' ;
289 # get related/embedded images as data uris
290 $find_images ->( $cid_hash, $entity );
292 my $alt = $multi_part || $html_part || $text_part ;
294 return entity_to_html
( $alt, $cid_hash, $viewimages, $allowhref ) if $alt ;
301 my ( $path, $raw, $viewimages, $allowhref ) = @_ ;
303 my $dumpdir = "/tmp/.proxdumpview_ $$ " ;
310 my ( $header, $content ) = read_raw_email
( $path );
313 encode_entities
( $header ) .
315 encode_entities
( $content ) .
320 my $parser = PMG
:: MIMEUtils
:: new_mime_parser
({
324 my $entity = $parser -> parse_open ( $path );
326 PMG
:: MIMEUtils
:: fixup_multipart
( $entity );
328 $html = entity_to_html
( $entity, {}, $viewimages, $allowhref );
335 die "unable to parse mail: $err " if $err ;