]>
Commit | Line | Data |
---|---|---|
91a5af02 | 1 | package PMG::Unpack; |
425ef29c DM |
2 | |
3 | use strict; | |
91a5af02 | 4 | use warnings; |
425ef29c DM |
5 | use IO::File; |
6 | use IO::Select; | |
7 | use Xdgmime; | |
91a5af02 DM |
8 | use Compress::Zlib qw(gzopen); |
9 | use Compress::Bzip2 qw(bzopen); | |
560c769e | 10 | use IO::Uncompress::Gunzip; |
425ef29c | 11 | use File::Path; |
91a5af02 | 12 | use File::Temp qw(tempdir); |
425ef29c DM |
13 | use File::Basename; |
14 | use File::stat; | |
15 | use POSIX ":sys_wait_h"; | |
91a5af02 | 16 | use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); |
425ef29c DM |
17 | use Archive::Zip qw(:CONSTANTS :ERROR_CODES); |
18 | use LibArchive; | |
425ef29c DM |
19 | use MIME::Parser; |
20 | ||
91a5af02 | 21 | use PMG::Utils; |
18598b2c | 22 | use PMG::MIMEUtils; |
91a5af02 | 23 | |
425ef29c DM |
24 | my $unpackers = { |
25 | ||
26 | # TAR | |
27 | 'application/x-tar' => [ 'tar', \&unpack_tar, 1], | |
28 | #'application/x-tar' => [ 'tar', \&generic_unpack ], | |
29 | #'application/x-tar' => [ '7z', \&generic_unpack ], | |
30 | 'application/x-compressed-tar' => [ 'tar', \&unpack_tar, 1], | |
31 | ||
32 | # CPIO | |
33 | 'application/x-cpio' => [ 'cpio', \&unpack_tar, 1], | |
34 | #'application/x-cpio' => [ '7z', \&generic_unpack ], | |
35 | ||
36 | # ZIP | |
776097f1 DM |
37 | #'application/zip' => [ 'zip', \&unpack_tar, 1], |
38 | 'application/zip' => [ '7z', \&generic_unpack ], | |
425ef29c DM |
39 | |
40 | # 7z | |
41 | 'application/x-7z-compressed' => [ '7z', \&generic_unpack ], | |
42 | ||
43 | # RAR | |
776097f1 | 44 | 'application/vnd.rar' => [ '7z', \&generic_unpack ], |
425ef29c DM |
45 | |
46 | # ARJ | |
47 | 'application/x-arj' => [ '7z', \&generic_unpack ], | |
48 | ||
49 | # RPM | |
50 | 'application/x-rpm' => [ '7z', \&generic_unpack ], | |
51 | ||
52 | # DEB | |
776097f1 | 53 | 'application/vnd.debian.binary-package' => [ 'ar', \&unpack_tar, 1], |
425ef29c DM |
54 | |
55 | # MS CAB | |
56 | 'application/vnd.ms-cab-compressed' => [ '7z', \&generic_unpack ], | |
57 | ||
58 | # LZH/LHA | |
59 | 'application/x-lha' => [ '7z', \&generic_unpack ], | |
60 | ||
61 | # TNEF (winmail.dat) | |
62 | 'application/vnd.ms-tnef' => [ 'tnef', \&generic_unpack ], | |
63 | ||
64 | # message/rfc822 | |
65 | 'message/rfc822' => [ 'mime', \&unpack_mime ], | |
66 | ||
67 | ## CHM, Nsis - supported by 7z, but we currently do not | |
68 | ||
69 | ##'application/x-zoo' - old format - no support | |
70 | ##'application/x-ms-dos-executable' - exe should be blocked anyways - no support | |
71 | ## application/x-arc - old format - no support | |
72 | }; | |
73 | ||
74 | my $decompressors = { | |
776097f1 | 75 | 'application/gzip' => [ 'guzip', \&uncompress_file ], |
425ef29c DM |
76 | 'application/x-compress' => [ 'uncompress', \&uncompress_file ], |
77 | # 'application/x-compressed-tar' => [ 'guzip', \&uncompress_file ], # unpack_tar is faster | |
78 | 'application/x-tarz' => [ 'uncompress', \&uncompress_file ], | |
79 | 'application/x-bzip' => [ 'bunzip2', \&uncompress_file ], | |
80 | 'application/x-bzip-compressed-tar' => [ 'bunzip2', \&uncompress_file ], | |
425ef29c DM |
81 | }; |
82 | ||
83 | ||
84 | ## some helper methods | |
85 | ||
86 | sub min2 { | |
87 | return ( $_[0] < $_[1]) ? $_[0] : $_[1]; | |
88 | } | |
89 | ||
90 | sub max2 { | |
91 | return ( $_[0] > $_[1]) ? $_[0] : $_[1]; | |
92 | } | |
93 | ||
94 | # STDERR is redirected to STDOUT by default | |
95 | sub helper_pipe_open { | |
96 | my ($fh, $inputfilename, $errorfilename, @cmd) = @_; | |
97 | ||
98 | my $pid = $fh->open ('-|'); | |
776097f1 | 99 | |
425ef29c DM |
100 | die "unable to fork helper process: $!" if !defined $pid; |
101 | ||
102 | return $pid if ($pid != 0); # parent process simply returns | |
776097f1 | 103 | |
425ef29c DM |
104 | $inputfilename = '/dev/null' if !$inputfilename; |
105 | ||
1359baef | 106 | # same algorithm as used inside SA |
425ef29c DM |
107 | |
108 | my $fd = fileno (STDIN); | |
109 | close STDIN; | |
110 | POSIX::close(0) if $fd != 0; | |
111 | ||
112 | if (!open (STDIN, "<$inputfilename")) { | |
776097f1 DM |
113 | POSIX::_exit (1); |
114 | kill ('KILL', $$); | |
425ef29c DM |
115 | } |
116 | ||
117 | $errorfilename = '&STDOUT' if !$errorfilename; | |
118 | ||
119 | $fd = fileno(STDERR); | |
120 | close STDERR; | |
121 | POSIX::close(2) if $fd != 2; | |
122 | ||
123 | if (!open (STDERR, ">$errorfilename")) { | |
776097f1 DM |
124 | POSIX::_exit (1); |
125 | kill ('KILL', $$); | |
425ef29c DM |
126 | } |
127 | ||
128 | exec @cmd; | |
776097f1 | 129 | |
425ef29c DM |
130 | warn "exec failed"; |
131 | ||
776097f1 DM |
132 | POSIX::_exit (1); |
133 | kill('KILL', $$); | |
425ef29c DM |
134 | die; # else -w complains |
135 | } | |
136 | ||
137 | sub helper_pipe_consume { | |
138 | my ($cfh, $pid, $timeout, $bufsize, $callback) = @_; | |
139 | ||
140 | eval { | |
141 | run_with_timeout ($timeout, sub { | |
142 | if ($bufsize) { | |
143 | my $buf; | |
144 | my $count; | |
145 | ||
146 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
147 | &$callback ($buf, $count); | |
148 | } | |
149 | die "pipe read failed" if ($count < 0); | |
150 | ||
151 | } else { | |
152 | while (my $line = <$cfh>) { | |
153 | &$callback ($line); | |
154 | } | |
155 | } | |
156 | }); | |
157 | }; | |
158 | ||
159 | my $err = $@; | |
776097f1 | 160 | |
425ef29c DM |
161 | # send TERM first if process still exits |
162 | if ($err) { | |
163 | kill (15, $pid) if kill (0, $pid); | |
164 | ||
165 | # read remaining data, if any | |
166 | my ($count, $buf); | |
167 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
168 | # do nothing | |
169 | } | |
170 | } | |
776097f1 | 171 | |
425ef29c DM |
172 | # then close pipe |
173 | my $closeerr; | |
174 | close ($cfh) || ($closeerr = $!); | |
175 | my $childstat = $?; | |
176 | ||
177 | # still alive ? | |
178 | if (kill (0, $pid)) { | |
179 | sleep (1); | |
180 | kill (9, $pid); # terminate process | |
181 | die "child '$pid' termination problems\n"; | |
182 | } | |
183 | ||
184 | die $err if $err; | |
185 | ||
186 | die "child '$pid' close failed - $closeerr\n" if $closeerr; | |
187 | ||
188 | die "child '$pid' failed: $childstat\n" if $childstat; | |
189 | } | |
190 | ||
191 | sub run_with_timeout { | |
192 | my ($timeout, $code, @param) = @_; | |
193 | ||
194 | die "got timeout\n" if $timeout <= 0; | |
195 | ||
196 | my $prev_alarm; | |
197 | ||
198 | my $sigcount = 0; | |
199 | ||
200 | my $res; | |
201 | ||
202 | eval { | |
203 | local $SIG{ALRM} = sub { $sigcount++; die "got timeout\n"; }; | |
204 | local $SIG{PIPE} = sub { $sigcount++; die "broken pipe\n" }; | |
205 | local $SIG{__DIE__}; # see SA bug 4631 | |
206 | ||
207 | $prev_alarm = alarm ($timeout); | |
208 | ||
209 | $res = &$code (@param); | |
210 | ||
211 | alarm 0; # avoid race conditions | |
212 | }; | |
213 | ||
214 | my $err = $@; | |
776097f1 | 215 | |
425ef29c DM |
216 | alarm ($prev_alarm) if defined ($prev_alarm); |
217 | ||
218 | die "unknown error" if $sigcount && !$err; # seems to happen sometimes | |
219 | ||
220 | die $err if $err; | |
221 | ||
222 | return $res; | |
223 | } | |
224 | ||
225 | # the unpacker object constructor | |
226 | ||
227 | sub new { | |
228 | my ($type, %param) = @_; | |
229 | ||
230 | my $self = {}; | |
231 | bless $self, $type; | |
232 | ||
233 | $self->{tmpdir} = $param{tmpdir} || tempdir (CLEANUP => 1); | |
234 | $self->{starttime} = [gettimeofday]; | |
235 | $self->{timeout} = $param{timeout} || 3600*24; | |
236 | ||
237 | # maxfiles: 0 = disabled | |
238 | $self->{maxfiles} = defined ($param{maxfiles}) ? $param{maxfiles} : 1000; | |
239 | ||
240 | $param{maxrec} = 0 if !defined ($param{maxrec}); | |
241 | if ($param{maxrec} < 0) { | |
242 | $param{maxrec} = - $param{maxrec}; | |
243 | $self->{maxrec_soft} = 1; # do not die when limit reached | |
244 | } | |
245 | ||
246 | $self->{maxrec} = $param{maxrec} || 8; # 0 = disabled | |
247 | $self->{maxratio} = $param{maxratio} || 0; # 0 = disabled | |
248 | ||
776097f1 | 249 | $self->{maxquota} = $param{quota} || 250*1024*1024; # 250 MB |
425ef29c DM |
250 | |
251 | $self->{ctonly} = $param{ctonly}; # only detect contained content types | |
252 | ||
253 | # internal | |
776097f1 | 254 | $self->{quota} = 0; |
425ef29c DM |
255 | $self->{ratioquota} = 0; |
256 | $self->{size} = 0; | |
257 | $self->{files} = 0; | |
258 | $self->{levels} = 0; | |
259 | ||
260 | $self->{debug} = $param{debug} || 0; | |
261 | ||
262 | $self->{mime} = {}; | |
29d30ef8 | 263 | $self->{filenames} = {}; |
425ef29c | 264 | |
1359baef TL |
265 | $self->{ufid} = 0; # counter to create unique file names |
266 | $self->{udid} = 0; # counter to create unique dir names | |
267 | $self->{ulid} = 0; # counter to create unique link names | |
425ef29c DM |
268 | |
269 | $self->{todo} = []; | |
270 | ||
271 | return $self; | |
272 | } | |
273 | ||
274 | sub cleanup { | |
275 | my $self = shift; | |
276 | ||
277 | if ($self->{debug}) { | |
278 | system ("find '$self->{tmpdir}'"); | |
279 | } | |
280 | ||
281 | rmtree ($self->{tmpdir}); | |
282 | } | |
283 | ||
284 | sub DESTROY { | |
285 | my $self = shift; | |
286 | ||
287 | rmtree ($self->{tmpdir}); | |
288 | } | |
289 | ||
290 | ||
291 | sub uncompress_file { | |
292 | my ($self, $app, $filename, $newname, $csize, $filesize) = @_; | |
293 | ||
294 | my $timeout = $self->check_timeout(); | |
295 | ||
296 | my $maxsize = $self->{quota} - $self->{size}; | |
297 | ||
298 | if ($self->{maxratio}) { | |
299 | $maxsize = min2 ($maxsize, $filesize * $self->{maxratio}); | |
300 | } | |
301 | ||
560c769e LN |
302 | if($app eq 'guzip' && (my $z = IO::Uncompress::Gunzip->new($filename))) { |
303 | # the name (FNAME) field is optional in GZIP archives, so we won't | |
304 | # always have a value here | |
305 | my $header = $z->getHeaderInfo(); | |
306 | $self->add_glob_mime_type($header->{Name}) if $header->{Name}; | |
307 | } | |
308 | ||
425ef29c DM |
309 | $self->add_glob_mime_type ($newname); |
310 | ||
311 | my $outfd; | |
312 | ||
313 | my $usize = 0; | |
314 | my $err; | |
315 | my $ct; | |
316 | my $todo = 1; | |
317 | ||
318 | if ($app eq 'guzip' || $app eq 'bunzip2') { | |
319 | ||
320 | my $cfh; | |
776097f1 | 321 | |
425ef29c DM |
322 | eval { |
323 | ||
324 | # bzip provides a gz compatible interface | |
776097f1 | 325 | if ($app eq 'bunzip2') { |
425ef29c DM |
326 | $self->{mime}->{'application/x-bzip'} = 1; |
327 | $cfh = bzopen ("$filename", 'r'); | |
328 | die "bzopen '$filename' failed" if !$cfh; | |
329 | } else { | |
776097f1 | 330 | $self->{mime}->{'application/gzip'} = 1; |
425ef29c DM |
331 | $cfh = gzopen ("$filename", 'rb'); |
332 | die "gzopen '$filename' failed" if !$cfh; | |
333 | } | |
776097f1 | 334 | |
425ef29c DM |
335 | run_with_timeout ($timeout, sub { |
336 | my $count; | |
337 | my $buf; | |
338 | while (($count = $cfh->gzread ($buf, 128*1024)) > 0) { | |
339 | ||
340 | if (!$usize) { | |
341 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count); | |
342 | ||
343 | $usize += $count; | |
344 | $self->{mime}->{$ct} = 1; | |
345 | ||
346 | if (!is_archive ($ct)) { | |
347 | $todo = 0; | |
348 | ||
349 | # warning: this can lead to wrong size/quota test | |
350 | last if $self->{ctonly}; | |
351 | } | |
352 | } else { | |
353 | $usize += $count; | |
354 | } | |
355 | ||
356 | $self->check_comp_ratio ($filesize, $usize); | |
357 | ||
358 | $self->check_quota (1, $usize, $csize); | |
359 | ||
360 | if (!$outfd) { | |
361 | $outfd = IO::File->new; | |
362 | ||
363 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
364 | die "unable to create file $newname: $!"; | |
365 | } | |
366 | } | |
367 | ||
368 | if (!$outfd->print ($buf)) { | |
369 | die "unable to write '$newname' - $!"; | |
370 | } | |
371 | } | |
372 | if ($count < 0) { | |
373 | die "gzread failed"; | |
374 | } | |
375 | }); | |
376 | }; | |
377 | ||
378 | $err = $@; | |
379 | ||
380 | $cfh->gzclose(); | |
381 | ||
382 | } elsif ($app eq 'uncompress') { | |
383 | ||
384 | $self->{mime}->{'application/x-compress'} = 1; | |
385 | ||
386 | eval { | |
387 | my @cmd = ('/bin/gunzip', '-c', $filename); | |
388 | my $cfh = IO::File->new(); | |
389 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @cmd); | |
390 | ||
391 | helper_pipe_consume ($cfh, $pid, $timeout, 128*1024, sub { | |
392 | my ($buf, $count) = @_; | |
393 | ||
394 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count) if (!$usize); | |
395 | ||
396 | $usize += $count; | |
397 | ||
398 | $self->check_comp_ratio ($filesize, $usize); | |
399 | ||
400 | $self->check_quota (1, $usize, $csize); | |
401 | ||
402 | if (!$outfd) { | |
403 | $outfd = IO::File->new; | |
404 | ||
405 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
406 | die "unable to create file $newname: $!"; | |
407 | } | |
408 | } | |
409 | ||
410 | if (!$outfd->print ($buf)) { | |
411 | die "unable to write '$newname' - $!"; | |
412 | } | |
413 | ||
414 | }); | |
415 | }; | |
776097f1 | 416 | |
425ef29c DM |
417 | $err = $@; |
418 | } | |
419 | ||
420 | $outfd->close () if $outfd; | |
421 | ||
422 | if ($err) { | |
423 | unlink $newname; | |
424 | die $err; | |
425 | } | |
426 | ||
427 | $self->check_quota (1, $usize, $csize, 1); | |
428 | ||
429 | $self->todo_list_add ($newname, $ct, $usize); | |
430 | ||
431 | return $newname; | |
432 | }; | |
433 | ||
434 | # calculate real filesystem space (needed by ext3 to store files/dirs) | |
435 | sub realsize { | |
436 | my ($size, $isdir) = @_; | |
437 | ||
438 | my $bs = 4096; # ext3 block size | |
439 | ||
440 | $size = max2 ($size, $bs) if $isdir; # dirs needs at least one block | |
441 | ||
442 | return int (($size + $bs - 1) / $bs) * $bs; # round up to block size | |
443 | } | |
444 | ||
445 | sub todo_list_add { | |
446 | my ($self, $filename, $ct, $size) = @_; | |
447 | ||
448 | if ($ct) { | |
449 | $self->{mime}->{$ct} = 1; | |
450 | if (is_archive ($ct)) { | |
451 | push @{$self->{todo}}, [$filename, $ct, $size]; | |
452 | } | |
453 | } | |
454 | } | |
455 | ||
456 | sub check_timeout { | |
457 | my ($self) = @_; | |
776097f1 | 458 | |
425ef29c DM |
459 | my $elapsed = int (tv_interval ($self->{starttime})); |
460 | my $timeout = $self->{timeout} - $elapsed; | |
461 | ||
462 | die "got timeout\n" if $timeout <= 0; | |
463 | ||
464 | return $timeout; | |
465 | } | |
466 | ||
467 | sub check_comp_ratio { | |
468 | my ($self, $compsize, $usize) = @_; | |
469 | ||
470 | return if !$compsize || !$self->{maxratio}; | |
471 | ||
472 | my $ratio = $usize/$compsize; | |
473 | ||
1359baef | 474 | die "compression ratio too large (> $self->{maxratio})" |
425ef29c DM |
475 | if $ratio > $self->{maxratio}; |
476 | } | |
477 | ||
478 | sub check_quota { | |
479 | my ($self, $files, $size, $csize, $commit) = @_; | |
480 | ||
481 | my $sizediff = $csize ? $size - $csize : $size; | |
482 | ||
1359baef | 483 | die "compression ratio too large (> $self->{maxratio})" |
425ef29c DM |
484 | if $self->{maxratio} && (($self->{size} + $sizediff) > $self->{ratioquota}); |
485 | ||
776097f1 | 486 | die "archive too large (> $self->{quota})" |
425ef29c DM |
487 | if ($self->{size} + $sizediff) > $self->{quota}; |
488 | ||
489 | die "unexpected number of files '$files'" if $files <= 0; | |
490 | ||
491 | $files-- if ($csize); | |
492 | ||
493 | die "too many files in archive (> $self->{maxfiles})" | |
494 | if $self->{maxfiles} && (($self->{files} + $files) > $self->{maxfiles}); | |
495 | ||
496 | if ($commit) { | |
497 | $self->{files} += $files; | |
498 | $self->{size} += $sizediff; | |
499 | } | |
776097f1 | 500 | |
425ef29c DM |
501 | } |
502 | ||
503 | sub add_glob_mime_type { | |
504 | my ($self, $filename) = @_; | |
505 | ||
29d30ef8 DC |
506 | my $basename = basename($filename); |
507 | $self->{filenames}->{$basename} = 1; | |
508 | ||
509 | if (my $ct = xdg_mime_get_mime_type_from_file_name($basename)) { | |
425ef29c DM |
510 | $self->{mime}->{$ct} = 1 if $ct ne 'application/octet-stream'; |
511 | } | |
512 | } | |
513 | ||
425ef29c DM |
514 | sub unpack_mime { |
515 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
516 | ||
517 | my $size = 0; | |
518 | my $files = 0; | |
519 | ||
520 | my $timeout = $self->check_timeout(); | |
521 | ||
522 | eval { | |
523 | run_with_timeout ($timeout, sub { | |
776097f1 | 524 | |
425ef29c | 525 | # Create a new MIME parser: |
18598b2c | 526 | my $max; |
425ef29c | 527 | if ($self->{maxfiles}) { |
18598b2c | 528 | $max = $self->{maxfiles} - $self->{files}; |
425ef29c DM |
529 | } |
530 | ||
18598b2c DC |
531 | my $parser = PMG::MIMEUtils::new_mime_parser({ |
532 | dumpdir => $tmpdir, | |
533 | nested => 1, | |
534 | ignore_errors => 1, | |
535 | extract_uuencode => 1, | |
536 | ignore_filename => 1, | |
537 | maxfiles => $max, | |
538 | }, 1); | |
539 | ||
425ef29c DM |
540 | my $entity = $parser->parse_open ($filename); |
541 | ||
18598b2c DC |
542 | PMG::MIMEUtils::traverse_mime_parts($entity, sub { |
543 | my ($part) = @_; | |
544 | my $ct = $part->head->mime_attr('content-type'); | |
545 | $self->{mime}->{$ct} = 1 if $ct && length($ct) < 256; | |
776097f1 | 546 | |
18598b2c DC |
547 | if (my $body = $part->bodyhandle) { |
548 | my $path = $body->path; | |
549 | $size += -s $path; | |
550 | $files++; | |
551 | } | |
552 | }); | |
425ef29c DM |
553 | }); |
554 | }; | |
555 | ||
556 | my $err = $@; | |
557 | ||
558 | die $err if $err; | |
559 | ||
560 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
561 | ||
562 | return 1; | |
563 | ||
564 | } | |
565 | ||
566 | sub unpack_zip { | |
567 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
568 | ||
569 | my $size = 0; | |
570 | my $files = 0; | |
571 | ||
572 | my $timeout = $self->check_timeout(); | |
573 | ||
574 | eval { | |
575 | ||
576 | my $zip = Archive::Zip->new (); | |
577 | ||
578 | Archive::Zip::setErrorHandler (sub { die @_ }); | |
579 | ||
580 | run_with_timeout ($timeout, sub { | |
581 | ||
582 | my $status = $zip->read ($filename); | |
583 | die "unable to open zip file '$filename'" if $status != AZ_OK; | |
776097f1 | 584 | |
425ef29c DM |
585 | my $tid = 1; |
586 | foreach my $mem ($zip->members) { | |
587 | ||
588 | $files++; | |
589 | ||
590 | my $cm = $mem->compressionMethod(); | |
591 | die "unsupported zip compression method '$cm'\n" | |
776097f1 | 592 | if !(($cm == COMPRESSION_DEFLATED || |
425ef29c DM |
593 | $cm == COMPRESSION_STORED)); |
594 | ||
595 | die "encrypted archive detected\n" | |
596 | if $mem->isEncrypted(); | |
597 | ||
598 | my $us = $mem->uncompressedSize(); | |
599 | ||
600 | next if $us <= 0; # skip zero size files | |
601 | ||
602 | if ($mem->isDirectory) { | |
603 | $size += realsize ($us, 1); | |
604 | } else { | |
605 | $size += realsize ($us); | |
606 | } | |
607 | ||
608 | $self->check_comp_ratio ($filesize, $size); | |
609 | ||
610 | $self->check_quota ($files, $size, $csize); | |
611 | ||
612 | next if $mem->isDirectory; # skip dirs | |
613 | ||
614 | my $name = basename ($mem->fileName()); | |
615 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
616 | my $newfn = sprintf "$tmpdir/Z%08d_$name", $tid++; | |
617 | ||
618 | $self->add_glob_mime_type ($name); | |
619 | ||
620 | my $outfd = IO::File->new; | |
621 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
776097f1 | 622 | die "unable to create file $newfn: $!"; |
425ef29c DM |
623 | } |
624 | ||
625 | my $ct; | |
626 | ||
627 | eval { | |
628 | ||
629 | $mem->desiredCompressionMethod (COMPRESSION_STORED); | |
630 | ||
631 | $status = $mem->rewindData(); | |
632 | ||
633 | die "unable to rewind zip stream" if $status != AZ_OK; | |
634 | ||
635 | my $outRef; | |
636 | my $bytes = 0; | |
637 | while ($status == AZ_OK) { | |
638 | ($outRef, $status) = $mem->readChunk(); | |
776097f1 | 639 | die "unable to read zip member" |
425ef29c DM |
640 | if ($status != AZ_OK && $status != AZ_STREAM_END); |
641 | ||
642 | my $len = length ($$outRef); | |
643 | if ($len > 0) { | |
644 | $ct = xdg_mime_get_mime_type_for_data ($$outRef, $len) if (!$bytes); | |
645 | $outfd->print ($$outRef) || die "write error during zip copy"; | |
646 | $bytes += $len; | |
647 | } | |
648 | ||
649 | last if $status == AZ_STREAM_END; | |
650 | } | |
651 | ||
776097f1 | 652 | $mem->endRead(); |
425ef29c DM |
653 | |
654 | $self->todo_list_add ($newfn, $ct, $bytes); | |
655 | ||
656 | }; | |
657 | ||
658 | my $err = $@; | |
659 | ||
660 | $outfd->close (); | |
661 | ||
662 | if ($err) { | |
663 | unlink $newfn; | |
664 | die $err; | |
665 | } | |
666 | } | |
667 | }); | |
668 | }; | |
669 | ||
670 | my $err = $@; | |
671 | ||
672 | die $err if $err; | |
673 | ||
674 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
675 | ||
676 | return 1; | |
677 | } | |
678 | ||
679 | sub unpack_tar { | |
680 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
681 | ||
682 | my $size = 0; | |
683 | my $files = 0; | |
684 | ||
685 | my $timeout = $self->check_timeout(); | |
686 | ||
687 | my $a = LibArchive::archive_read_new(); | |
688 | ||
689 | die "unable to create LibArchive object" if !$a; | |
690 | ||
691 | LibArchive::archive_read_support_format_all ($a); | |
2e08e3c9 | 692 | LibArchive::archive_read_support_filter_all ($a); |
425ef29c DM |
693 | |
694 | eval { | |
695 | run_with_timeout ($timeout, sub { | |
696 | ||
697 | if ((my $r = LibArchive::archive_read_open_filename ($a, $filename, 10240))) { | |
776097f1 | 698 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); |
425ef29c DM |
699 | } |
700 | my $tid = 1; | |
701 | for (;;) { | |
702 | my $entry; | |
703 | my $r = LibArchive::archive_read_next_header ($a, $entry); | |
704 | ||
705 | last if ($r == LibArchive::ARCHIVE_EOF); | |
706 | ||
707 | if ($r != LibArchive::ARCHIVE_OK) { | |
708 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); | |
709 | } | |
710 | ||
711 | my $us = LibArchive::archive_entry_size ($entry); | |
712 | my $mode = LibArchive::archive_entry_mode ($entry); | |
713 | ||
714 | my $rs; | |
715 | if (POSIX::S_ISREG ($mode)) { | |
716 | $rs = realsize ($us); | |
717 | } else { | |
718 | $rs = POSIX::S_ISDIR ($mode) ? realsize ($us, 1) : 256; | |
719 | } | |
720 | $size += $rs; | |
721 | $files += 1; | |
722 | ||
723 | $self->check_comp_ratio ($filesize, $size); | |
724 | ||
725 | $self->check_quota ($files, $size, $csize); | |
726 | ||
727 | next if POSIX::S_ISDIR ($mode); | |
728 | next if !POSIX::S_ISREG ($mode); | |
729 | ||
730 | my $name = basename (LibArchive::archive_entry_pathname ($entry)); | |
731 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
732 | my $newfn = sprintf "$tmpdir/A%08d_$name", $tid++; | |
733 | ||
734 | $self->add_glob_mime_type ($name); | |
735 | ||
736 | my $outfd; | |
737 | ||
738 | eval { | |
739 | my $bytes = 0; | |
740 | my $ct; | |
741 | my $todo = 1; | |
742 | ||
743 | if ($us > 0) { | |
744 | my $len; | |
745 | my $buf; | |
746 | while (($len = LibArchive::archive_read_data($a, $buf, 128*1024)) > 0) { | |
776097f1 | 747 | |
425ef29c DM |
748 | if (!$bytes) { |
749 | if ($ct = xdg_mime_get_mime_type_for_data ($buf, $len)) { | |
750 | $self->{mime}->{$ct} = 1; | |
751 | ||
752 | if (!is_archive ($ct)) { | |
753 | $todo = 0; | |
754 | last if $self->{ctonly}; | |
755 | } | |
756 | } | |
757 | } | |
776097f1 | 758 | |
425ef29c DM |
759 | $bytes += $len; |
760 | ||
761 | if (!$outfd) { # create only when needed | |
762 | $outfd = IO::File->new; | |
776097f1 | 763 | |
425ef29c DM |
764 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { |
765 | die "unable to create file $newfn: $!"; | |
766 | } | |
767 | } | |
768 | ||
769 | if (!$outfd->print ($buf)) { | |
770 | die "unable to write '$newfn' - $!"; | |
771 | } | |
772 | } | |
773 | ||
774 | die ("error reading archive (encrypted)\n") | |
775 | if ($len < 0); | |
776 | } | |
777 | ||
778 | $self->todo_list_add ($newfn, $ct, $bytes) if $todo; | |
779 | }; | |
780 | ||
781 | my $err = $@; | |
782 | ||
783 | $outfd->close () if $outfd; | |
776097f1 | 784 | |
425ef29c DM |
785 | if ($err) { |
786 | unlink $newfn; | |
787 | die $err; | |
788 | } | |
776097f1 | 789 | } |
425ef29c DM |
790 | }); |
791 | }; | |
792 | ||
793 | my $err = $@; | |
794 | ||
795 | LibArchive::archive_read_close($a); | |
2e08e3c9 | 796 | LibArchive::archive_read_free($a); |
776097f1 | 797 | |
425ef29c DM |
798 | die $err if $err; |
799 | ||
800 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
801 | ||
802 | return 1; | |
803 | } | |
804 | ||
805 | sub generic_unpack { | |
806 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
807 | ||
808 | my $size = 0; | |
809 | my $files = 0; | |
810 | ||
811 | my $timeout = $self->check_timeout(); | |
812 | ||
813 | my @listcmd; | |
814 | my @restorecmd = ('/bin/false'); | |
815 | ||
776097f1 | 816 | my $filter; |
425ef29c DM |
817 | |
818 | if ($app eq 'tar') { | |
819 | @listcmd = ('/bin/tar', '-tvf', $filename); | |
820 | @restorecmd = ('/bin/tar', '-x', '--backup=number', "--transform='s,[^A-Za-z0-9\./],-,g'", '-o', | |
821 | '-m', '-C', $tmpdir, '-f', $filename); | |
822 | $filter = sub { | |
823 | my $line = shift; | |
824 | if ($line =~ m/^(\S)\S+\s+\S+\s+([\d,\.]+)\s+\S+/) { | |
825 | my ($type, $bytes) = ($1, $2); | |
826 | $bytes =~ s/[,\.]//g; | |
827 | ||
828 | if ($type eq 'd') { | |
829 | $bytes = realsize ($bytes, 1); | |
830 | } elsif ($type eq '-') { | |
831 | $bytes = realsize ($bytes); | |
832 | } else { | |
833 | $bytes = 256; # simple assumption | |
834 | } | |
835 | ||
836 | $size += $bytes; | |
837 | $files++; | |
838 | ||
839 | $self->check_comp_ratio ($filesize, $size); | |
840 | $self->check_quota ($files, $size, $csize); | |
841 | ||
842 | } else { | |
843 | die "can't parse tar output: $line\n"; | |
844 | } | |
845 | } | |
846 | } elsif ($app eq '7z' || $app eq '7zsimple') { | |
847 | # Note: set password to 'none' with '-pnone', to avoid reading from /dev/tty | |
848 | @restorecmd = ('/usr/bin/7z', 'e', '-pnone', '-bd', '-y', '-aou', "-w$self->{tmpdir}", "-o$tmpdir", $filename); | |
849 | ||
850 | @listcmd = ('/usr/bin/7z', 'l', '-slt', $filename); | |
851 | ||
852 | my ($path, $folder, $bytes); | |
853 | ||
854 | $filter = sub { | |
855 | my $line = shift; | |
856 | chomp $line; | |
857 | ||
858 | if ($line =~ m/^\s*\z/) { | |
859 | if (defined ($path) && defined ($bytes)) { | |
860 | $bytes = realsize ($bytes, $folder); | |
861 | $size += $bytes; | |
862 | $files++; | |
863 | ||
864 | $self->check_comp_ratio ($filesize, $size); | |
865 | $self->check_quota ($files, $size, $csize); | |
866 | } | |
867 | undef $path; | |
868 | undef $folder; | |
869 | undef $bytes; | |
776097f1 | 870 | |
425ef29c DM |
871 | } elsif ($line =~ m/^Path = (.*)\z/s) { |
872 | $path = $1; | |
873 | } elsif ($line =~ m/^Size = (\d+)\z/s) { | |
874 | $bytes = $1; | |
875 | } elsif ($line =~ m/^Folder = (\d+)\z/s) { | |
876 | $folder = $1; | |
877 | } elsif ($line =~ m/^Attributes = ([D\.][R\.][H\.][S\.][A\.])\z/s) { | |
878 | $folder = 1 if $1 && substr ($1, 0, 1) eq 'D'; | |
879 | } | |
880 | }; | |
881 | ||
882 | } elsif ($app eq 'tnef') { | |
883 | @listcmd = ('/usr/bin/tnef', '-tv', '-f', $filename); | |
884 | @restorecmd = ('/usr/bin/tnef', '-C', $tmpdir, '--number-backups', '-f', $filename); | |
885 | ||
886 | $filter = sub { | |
887 | my $line = shift; | |
888 | chomp $line; | |
889 | ||
890 | if ($line =~ m!^\s*(\d+)\s*|\s*\d{4}/\d{1,2}/\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2}\s*|!) { | |
891 | my $bytes = $1; | |
892 | ||
893 | $bytes = realsize ($bytes); | |
894 | $size += $bytes; | |
895 | $files++; | |
896 | ||
897 | $self->check_comp_ratio ($filesize, $size); | |
898 | $self->check_quota ($files, $size, $csize); | |
899 | } else { | |
900 | die "can't parse tnef output\n"; | |
901 | } | |
902 | ||
903 | }; | |
904 | ||
905 | } else { | |
906 | die "unknown application '$app'"; | |
907 | } | |
908 | ||
909 | eval { | |
910 | ||
911 | my $cfh = IO::File->new(); | |
912 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @listcmd); | |
776097f1 | 913 | |
425ef29c DM |
914 | helper_pipe_consume ($cfh, $pid, $timeout, 0, $filter); |
915 | }; | |
916 | ||
917 | my $err = $@; | |
918 | ||
919 | die $err if $err; | |
920 | ||
921 | return if !$files; # empty archive | |
922 | ||
923 | $self->check_quota ($files, $size, $csize, 1); | |
924 | ||
925 | $timeout = $self->check_timeout(); | |
926 | ||
927 | my $cfh = IO::File->new(); | |
928 | my $pid = helper_pipe_open ($cfh, '/dev/null', undef, @restorecmd); | |
929 | helper_pipe_consume ($cfh, $pid, $timeout, 0, sub { | |
930 | my $line = shift; | |
931 | print "$app: $line" if $self->{debug}; | |
932 | }); | |
933 | ||
934 | return 1; | |
935 | } | |
936 | ||
937 | sub unpack_dir { | |
938 | my ($self, $dirname, $level) = @_; | |
939 | ||
940 | local (*DIR); | |
941 | ||
942 | print "unpack dir '$dirname'\n" if $self->{debug}; | |
943 | ||
944 | opendir(DIR, $dirname) || die "can't opendir $dirname: $!"; | |
945 | ||
946 | my $name; | |
947 | ||
776097f1 | 948 | while (defined ($name = readdir (DIR))) { |
425ef29c DM |
949 | my $path = "$dirname/$name"; |
950 | my $st = lstat ($path); | |
951 | ||
952 | if (!$st) { | |
953 | die "no such file '$path' - $!"; | |
954 | } elsif (POSIX::S_ISDIR ($st->mode)) { | |
955 | next if ($name eq '.' || $name eq '..'); | |
956 | $self->unpack_dir ($path, $level); | |
957 | } elsif (POSIX::S_ISREG ($st->mode)) { | |
958 | my $size = $st->size; | |
959 | $self->__unpack_archive ($path, $level + 1, $size); | |
960 | } | |
961 | } | |
962 | ||
963 | closedir DIR; | |
964 | } | |
965 | ||
966 | sub unpack_todo { | |
967 | my ($self, $level) = @_; | |
968 | ||
969 | my $ta = $self->{todo}; | |
970 | $self->{todo} = []; | |
971 | ||
972 | foreach my $todo (@$ta) { | |
973 | $self->__unpack_archive ($todo->[0], $level, $todo->[2], $todo->[1]); | |
974 | } | |
975 | } | |
976 | ||
977 | sub __unpack_archive { | |
978 | my ($self, $filename, $level, $size, $ct) = @_; | |
979 | ||
980 | $level = 0 if !$level; | |
981 | ||
776097f1 | 982 | $self->{levels} = max2($self->{levels}, $level); |
425ef29c DM |
983 | |
984 | if ($self->{maxrec} && ($level >= $self->{maxrec})) { | |
776097f1 | 985 | return if $self->{maxrec_soft}; |
425ef29c DM |
986 | die "max recursion limit reached\n"; |
987 | } | |
988 | ||
989 | die "undefined file size" if !defined ($size); | |
990 | ||
991 | return if !$size; # nothing to do | |
992 | ||
993 | if (!$ct) { | |
91a5af02 DM |
994 | $ct = PMG::Utils::magic_mime_type_for_file($filename); |
995 | $self->add_glob_mime_type($filename); | |
425ef29c DM |
996 | } |
997 | ||
998 | if ($ct) { | |
999 | $self->{mime}->{$ct} = 1; | |
1000 | ||
776097f1 | 1001 | if (defined($decompressors->{$ct})) { |
425ef29c DM |
1002 | |
1003 | my ($app, $code) = @{$decompressors->{$ct}}; | |
1004 | ||
1005 | if ($app) { | |
1006 | ||
1007 | # we try to keep extension correctly | |
776097f1 DM |
1008 | my $tmp = basename($filename); |
1009 | ($ct eq 'application/gzip') && | |
425ef29c | 1010 | $tmp =~ s/\.gz\z//; |
776097f1 | 1011 | ($ct eq 'application/x-bzip') && |
425ef29c | 1012 | $tmp =~ s/\.bz2?\z//; |
776097f1 | 1013 | ($ct eq 'application/x-compress') && |
425ef29c | 1014 | $tmp =~ s/\.Z\z//; |
776097f1 | 1015 | ($ct eq 'application/x-compressed-tar') && |
425ef29c | 1016 | $tmp =~ s/\.gz\z// || $tmp =~ s/\.tgz\z/.tar/; |
776097f1 | 1017 | ($ct eq 'application/x-bzip-compressed-tar') && |
425ef29c DM |
1018 | $tmp =~ s/\.bz2?\z// || $tmp =~ s/\.tbz\z/.tar/; |
1019 | ($ct eq 'application/x-tarz') && | |
1020 | $tmp =~ s/\.Z\z//; | |
1021 | ||
776097f1 | 1022 | my $newname = sprintf "%s/DC_%08d_%s", $self->{tmpdir}, ++$self->{ufid}, $tmp; |
425ef29c DM |
1023 | |
1024 | print "Decomp: $filename\n\t($ct) with $app to $newname\n" | |
776097f1 | 1025 | if $self->{debug}; |
425ef29c | 1026 | |
776097f1 | 1027 | if (my $res = &$code($self, $app, $filename, $newname, $level ? $size : 0, $size)) { |
425ef29c DM |
1028 | unlink $filename if $level; |
1029 | $self->unpack_todo ($level + 1); | |
1030 | } | |
1031 | } | |
1032 | } elsif (defined ($unpackers->{$ct})) { | |
1033 | ||
1034 | my ($app, $code, $ctdetect) = @{$unpackers->{$ct}}; | |
1035 | ||
1036 | if ($app) { | |
1037 | ||
776097f1 | 1038 | my $tmpdir = sprintf "%s/DIR_%08d", $self->{tmpdir}, ++$self->{udid}; |
425ef29c | 1039 | mkdir $tmpdir; |
776097f1 | 1040 | |
425ef29c DM |
1041 | print "Unpack: $filename\n\t($ct) with $app to $tmpdir\n" |
1042 | if $self->{debug}; | |
1043 | ||
1044 | if (my $res = &$code ($self, $app, $filename, $tmpdir, $level ? $size : 0, $size)) { | |
1045 | unlink $filename if $level; | |
1046 | ||
1047 | if ($ctdetect) { | |
1048 | $self->unpack_todo ($level + 1); | |
1049 | } else { | |
1050 | $self->unpack_dir ($tmpdir, $level); | |
1051 | } | |
1052 | } | |
1053 | } | |
1054 | } | |
1055 | } | |
1056 | } | |
1057 | ||
1058 | sub is_archive { | |
776097f1 | 1059 | my ($ct) = @_; |
425ef29c | 1060 | |
776097f1 | 1061 | return defined($decompressors->{$ct}) || defined($unpackers->{$ct}); |
425ef29c DM |
1062 | } |
1063 | ||
1064 | # unpack_archive | |
1065 | # | |
1066 | # Description: unpacks an archive and records containing | |
1067 | # content types (detected by magic numbers and file extension) | |
776097f1 DM |
1068 | # Extracted files are stored inside 'tempdir'. |
1069 | # | |
1359baef | 1070 | # returns: true if file is archive, undef otherwise |
425ef29c DM |
1071 | |
1072 | sub unpack_archive { | |
1073 | my ($self, $filename, $ct) = @_; | |
1074 | ||
776097f1 | 1075 | my $st = lstat($filename); |
425ef29c DM |
1076 | my $size = 0; |
1077 | ||
1078 | if (!$st) { | |
1079 | die "no such file '$filename' - $!"; | |
776097f1 | 1080 | } elsif (POSIX::S_ISREG($st->mode)) { |
425ef29c DM |
1081 | $size = $st->size; |
1082 | ||
1083 | return if !$size; # do nothing | |
1084 | ||
1085 | $self->{quota} = $self->{maxquota} - $self->{size}; | |
1086 | ||
1087 | $self->{ratioquota} = $size * $self->{maxratio} if $self->{maxratio}; | |
776097f1 | 1088 | |
425ef29c DM |
1089 | } else { |
1090 | return; # do nothing | |
1091 | } | |
776097f1 | 1092 | |
91a5af02 | 1093 | $ct = PMG::Utils::magic_mime_type_for_file($filename) if !$ct; |
425ef29c | 1094 | |
776097f1 | 1095 | return if (!$ct || !is_archive($ct)); # not an archive |
425ef29c DM |
1096 | |
1097 | eval { | |
776097f1 | 1098 | $self->__unpack_archive($filename, 0, $st->size, $ct); |
425ef29c DM |
1099 | }; |
1100 | ||
1101 | my $err = $@; | |
1102 | ||
776097f1 | 1103 | printf "ELAPSED: %.2f ms $filename\n", |
425ef29c DM |
1104 | int(tv_interval ($self->{starttime}) * 1000) |
1105 | if $self->{debug}; | |
1106 | ||
1107 | if ($err) { | |
776097f1 | 1108 | $self->{mime}->{'proxmox/unreadable-archive'} = 1; |
425ef29c DM |
1109 | die $err; |
1110 | } | |
1111 | return 1; | |
1112 | } | |
1113 | ||
1114 | 1; | |
1115 | __END__ |