]>
Commit | Line | Data |
---|---|---|
91a5af02 | 1 | package PMG::Unpack; |
425ef29c DM |
2 | |
3 | use strict; | |
91a5af02 | 4 | use warnings; |
425ef29c DM |
5 | use IO::File; |
6 | use IO::Select; | |
7 | use Xdgmime; | |
91a5af02 DM |
8 | use Compress::Zlib qw(gzopen); |
9 | use Compress::Bzip2 qw(bzopen); | |
425ef29c | 10 | use File::Path; |
91a5af02 | 11 | use File::Temp qw(tempdir); |
425ef29c DM |
12 | use File::Basename; |
13 | use File::stat; | |
14 | use POSIX ":sys_wait_h"; | |
91a5af02 | 15 | use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); |
425ef29c DM |
16 | use Archive::Zip qw(:CONSTANTS :ERROR_CODES); |
17 | use LibArchive; | |
425ef29c DM |
18 | use MIME::Parser; |
19 | ||
91a5af02 | 20 | use PMG::Utils; |
18598b2c | 21 | use PMG::MIMEUtils; |
91a5af02 | 22 | |
425ef29c DM |
23 | my $unpackers = { |
24 | ||
25 | # TAR | |
26 | 'application/x-tar' => [ 'tar', \&unpack_tar, 1], | |
27 | #'application/x-tar' => [ 'tar', \&generic_unpack ], | |
28 | #'application/x-tar' => [ '7z', \&generic_unpack ], | |
29 | 'application/x-compressed-tar' => [ 'tar', \&unpack_tar, 1], | |
30 | ||
31 | # CPIO | |
32 | 'application/x-cpio' => [ 'cpio', \&unpack_tar, 1], | |
33 | #'application/x-cpio' => [ '7z', \&generic_unpack ], | |
34 | ||
35 | # ZIP | |
776097f1 DM |
36 | #'application/zip' => [ 'zip', \&unpack_tar, 1], |
37 | 'application/zip' => [ '7z', \&generic_unpack ], | |
425ef29c DM |
38 | |
39 | # 7z | |
40 | 'application/x-7z-compressed' => [ '7z', \&generic_unpack ], | |
41 | ||
42 | # RAR | |
776097f1 | 43 | 'application/vnd.rar' => [ '7z', \&generic_unpack ], |
425ef29c DM |
44 | |
45 | # ARJ | |
46 | 'application/x-arj' => [ '7z', \&generic_unpack ], | |
47 | ||
48 | # RPM | |
49 | 'application/x-rpm' => [ '7z', \&generic_unpack ], | |
50 | ||
51 | # DEB | |
776097f1 | 52 | 'application/vnd.debian.binary-package' => [ 'ar', \&unpack_tar, 1], |
425ef29c DM |
53 | |
54 | # MS CAB | |
55 | 'application/vnd.ms-cab-compressed' => [ '7z', \&generic_unpack ], | |
56 | ||
57 | # LZH/LHA | |
58 | 'application/x-lha' => [ '7z', \&generic_unpack ], | |
59 | ||
60 | # TNEF (winmail.dat) | |
61 | 'application/vnd.ms-tnef' => [ 'tnef', \&generic_unpack ], | |
62 | ||
63 | # message/rfc822 | |
64 | 'message/rfc822' => [ 'mime', \&unpack_mime ], | |
65 | ||
66 | ## CHM, Nsis - supported by 7z, but we currently do not | |
67 | ||
68 | ##'application/x-zoo' - old format - no support | |
69 | ##'application/x-ms-dos-executable' - exe should be blocked anyways - no support | |
70 | ## application/x-arc - old format - no support | |
71 | }; | |
72 | ||
73 | my $decompressors = { | |
776097f1 | 74 | 'application/gzip' => [ 'guzip', \&uncompress_file ], |
425ef29c DM |
75 | 'application/x-compress' => [ 'uncompress', \&uncompress_file ], |
76 | # 'application/x-compressed-tar' => [ 'guzip', \&uncompress_file ], # unpack_tar is faster | |
77 | 'application/x-tarz' => [ 'uncompress', \&uncompress_file ], | |
78 | 'application/x-bzip' => [ 'bunzip2', \&uncompress_file ], | |
79 | 'application/x-bzip-compressed-tar' => [ 'bunzip2', \&uncompress_file ], | |
425ef29c DM |
80 | }; |
81 | ||
82 | ||
83 | ## some helper methods | |
84 | ||
85 | sub min2 { | |
86 | return ( $_[0] < $_[1]) ? $_[0] : $_[1]; | |
87 | } | |
88 | ||
89 | sub max2 { | |
90 | return ( $_[0] > $_[1]) ? $_[0] : $_[1]; | |
91 | } | |
92 | ||
93 | # STDERR is redirected to STDOUT by default | |
94 | sub helper_pipe_open { | |
95 | my ($fh, $inputfilename, $errorfilename, @cmd) = @_; | |
96 | ||
97 | my $pid = $fh->open ('-|'); | |
776097f1 | 98 | |
425ef29c DM |
99 | die "unable to fork helper process: $!" if !defined $pid; |
100 | ||
101 | return $pid if ($pid != 0); # parent process simply returns | |
776097f1 | 102 | |
425ef29c DM |
103 | $inputfilename = '/dev/null' if !$inputfilename; |
104 | ||
1359baef | 105 | # same algorithm as used inside SA |
425ef29c DM |
106 | |
107 | my $fd = fileno (STDIN); | |
108 | close STDIN; | |
109 | POSIX::close(0) if $fd != 0; | |
110 | ||
111 | if (!open (STDIN, "<$inputfilename")) { | |
776097f1 DM |
112 | POSIX::_exit (1); |
113 | kill ('KILL', $$); | |
425ef29c DM |
114 | } |
115 | ||
116 | $errorfilename = '&STDOUT' if !$errorfilename; | |
117 | ||
118 | $fd = fileno(STDERR); | |
119 | close STDERR; | |
120 | POSIX::close(2) if $fd != 2; | |
121 | ||
122 | if (!open (STDERR, ">$errorfilename")) { | |
776097f1 DM |
123 | POSIX::_exit (1); |
124 | kill ('KILL', $$); | |
425ef29c DM |
125 | } |
126 | ||
127 | exec @cmd; | |
776097f1 | 128 | |
425ef29c DM |
129 | warn "exec failed"; |
130 | ||
776097f1 DM |
131 | POSIX::_exit (1); |
132 | kill('KILL', $$); | |
425ef29c DM |
133 | die; # else -w complains |
134 | } | |
135 | ||
136 | sub helper_pipe_consume { | |
137 | my ($cfh, $pid, $timeout, $bufsize, $callback) = @_; | |
138 | ||
139 | eval { | |
140 | run_with_timeout ($timeout, sub { | |
141 | if ($bufsize) { | |
142 | my $buf; | |
143 | my $count; | |
144 | ||
145 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
146 | &$callback ($buf, $count); | |
147 | } | |
148 | die "pipe read failed" if ($count < 0); | |
149 | ||
150 | } else { | |
151 | while (my $line = <$cfh>) { | |
152 | &$callback ($line); | |
153 | } | |
154 | } | |
155 | }); | |
156 | }; | |
157 | ||
158 | my $err = $@; | |
776097f1 | 159 | |
425ef29c DM |
160 | # send TERM first if process still exits |
161 | if ($err) { | |
162 | kill (15, $pid) if kill (0, $pid); | |
163 | ||
164 | # read remaining data, if any | |
165 | my ($count, $buf); | |
166 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
167 | # do nothing | |
168 | } | |
169 | } | |
776097f1 | 170 | |
425ef29c DM |
171 | # then close pipe |
172 | my $closeerr; | |
173 | close ($cfh) || ($closeerr = $!); | |
174 | my $childstat = $?; | |
175 | ||
176 | # still alive ? | |
177 | if (kill (0, $pid)) { | |
178 | sleep (1); | |
179 | kill (9, $pid); # terminate process | |
180 | die "child '$pid' termination problems\n"; | |
181 | } | |
182 | ||
183 | die $err if $err; | |
184 | ||
185 | die "child '$pid' close failed - $closeerr\n" if $closeerr; | |
186 | ||
187 | die "child '$pid' failed: $childstat\n" if $childstat; | |
188 | } | |
189 | ||
190 | sub run_with_timeout { | |
191 | my ($timeout, $code, @param) = @_; | |
192 | ||
193 | die "got timeout\n" if $timeout <= 0; | |
194 | ||
195 | my $prev_alarm; | |
196 | ||
197 | my $sigcount = 0; | |
198 | ||
199 | my $res; | |
200 | ||
201 | eval { | |
202 | local $SIG{ALRM} = sub { $sigcount++; die "got timeout\n"; }; | |
203 | local $SIG{PIPE} = sub { $sigcount++; die "broken pipe\n" }; | |
204 | local $SIG{__DIE__}; # see SA bug 4631 | |
205 | ||
206 | $prev_alarm = alarm ($timeout); | |
207 | ||
208 | $res = &$code (@param); | |
209 | ||
210 | alarm 0; # avoid race conditions | |
211 | }; | |
212 | ||
213 | my $err = $@; | |
776097f1 | 214 | |
425ef29c DM |
215 | alarm ($prev_alarm) if defined ($prev_alarm); |
216 | ||
217 | die "unknown error" if $sigcount && !$err; # seems to happen sometimes | |
218 | ||
219 | die $err if $err; | |
220 | ||
221 | return $res; | |
222 | } | |
223 | ||
224 | # the unpacker object constructor | |
225 | ||
226 | sub new { | |
227 | my ($type, %param) = @_; | |
228 | ||
229 | my $self = {}; | |
230 | bless $self, $type; | |
231 | ||
232 | $self->{tmpdir} = $param{tmpdir} || tempdir (CLEANUP => 1); | |
233 | $self->{starttime} = [gettimeofday]; | |
234 | $self->{timeout} = $param{timeout} || 3600*24; | |
235 | ||
236 | # maxfiles: 0 = disabled | |
237 | $self->{maxfiles} = defined ($param{maxfiles}) ? $param{maxfiles} : 1000; | |
238 | ||
239 | $param{maxrec} = 0 if !defined ($param{maxrec}); | |
240 | if ($param{maxrec} < 0) { | |
241 | $param{maxrec} = - $param{maxrec}; | |
242 | $self->{maxrec_soft} = 1; # do not die when limit reached | |
243 | } | |
244 | ||
245 | $self->{maxrec} = $param{maxrec} || 8; # 0 = disabled | |
246 | $self->{maxratio} = $param{maxratio} || 0; # 0 = disabled | |
247 | ||
776097f1 | 248 | $self->{maxquota} = $param{quota} || 250*1024*1024; # 250 MB |
425ef29c DM |
249 | |
250 | $self->{ctonly} = $param{ctonly}; # only detect contained content types | |
251 | ||
252 | # internal | |
776097f1 | 253 | $self->{quota} = 0; |
425ef29c DM |
254 | $self->{ratioquota} = 0; |
255 | $self->{size} = 0; | |
256 | $self->{files} = 0; | |
257 | $self->{levels} = 0; | |
258 | ||
259 | $self->{debug} = $param{debug} || 0; | |
260 | ||
261 | $self->{mime} = {}; | |
29d30ef8 | 262 | $self->{filenames} = {}; |
425ef29c | 263 | |
1359baef TL |
264 | $self->{ufid} = 0; # counter to create unique file names |
265 | $self->{udid} = 0; # counter to create unique dir names | |
266 | $self->{ulid} = 0; # counter to create unique link names | |
425ef29c DM |
267 | |
268 | $self->{todo} = []; | |
269 | ||
270 | return $self; | |
271 | } | |
272 | ||
273 | sub cleanup { | |
274 | my $self = shift; | |
275 | ||
276 | if ($self->{debug}) { | |
277 | system ("find '$self->{tmpdir}'"); | |
278 | } | |
279 | ||
280 | rmtree ($self->{tmpdir}); | |
281 | } | |
282 | ||
283 | sub DESTROY { | |
284 | my $self = shift; | |
285 | ||
286 | rmtree ($self->{tmpdir}); | |
287 | } | |
288 | ||
289 | ||
290 | sub uncompress_file { | |
291 | my ($self, $app, $filename, $newname, $csize, $filesize) = @_; | |
292 | ||
293 | my $timeout = $self->check_timeout(); | |
294 | ||
295 | my $maxsize = $self->{quota} - $self->{size}; | |
296 | ||
297 | if ($self->{maxratio}) { | |
298 | $maxsize = min2 ($maxsize, $filesize * $self->{maxratio}); | |
299 | } | |
300 | ||
301 | $self->add_glob_mime_type ($newname); | |
302 | ||
303 | my $outfd; | |
304 | ||
305 | my $usize = 0; | |
306 | my $err; | |
307 | my $ct; | |
308 | my $todo = 1; | |
309 | ||
310 | if ($app eq 'guzip' || $app eq 'bunzip2') { | |
311 | ||
312 | my $cfh; | |
776097f1 | 313 | |
425ef29c DM |
314 | eval { |
315 | ||
316 | # bzip provides a gz compatible interface | |
776097f1 | 317 | if ($app eq 'bunzip2') { |
425ef29c DM |
318 | $self->{mime}->{'application/x-bzip'} = 1; |
319 | $cfh = bzopen ("$filename", 'r'); | |
320 | die "bzopen '$filename' failed" if !$cfh; | |
321 | } else { | |
776097f1 | 322 | $self->{mime}->{'application/gzip'} = 1; |
425ef29c DM |
323 | $cfh = gzopen ("$filename", 'rb'); |
324 | die "gzopen '$filename' failed" if !$cfh; | |
325 | } | |
776097f1 | 326 | |
425ef29c DM |
327 | run_with_timeout ($timeout, sub { |
328 | my $count; | |
329 | my $buf; | |
330 | while (($count = $cfh->gzread ($buf, 128*1024)) > 0) { | |
331 | ||
332 | if (!$usize) { | |
333 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count); | |
334 | ||
335 | $usize += $count; | |
336 | $self->{mime}->{$ct} = 1; | |
337 | ||
338 | if (!is_archive ($ct)) { | |
339 | $todo = 0; | |
340 | ||
341 | # warning: this can lead to wrong size/quota test | |
342 | last if $self->{ctonly}; | |
343 | } | |
344 | } else { | |
345 | $usize += $count; | |
346 | } | |
347 | ||
348 | $self->check_comp_ratio ($filesize, $usize); | |
349 | ||
350 | $self->check_quota (1, $usize, $csize); | |
351 | ||
352 | if (!$outfd) { | |
353 | $outfd = IO::File->new; | |
354 | ||
355 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
356 | die "unable to create file $newname: $!"; | |
357 | } | |
358 | } | |
359 | ||
360 | if (!$outfd->print ($buf)) { | |
361 | die "unable to write '$newname' - $!"; | |
362 | } | |
363 | } | |
364 | if ($count < 0) { | |
365 | die "gzread failed"; | |
366 | } | |
367 | }); | |
368 | }; | |
369 | ||
370 | $err = $@; | |
371 | ||
372 | $cfh->gzclose(); | |
373 | ||
374 | } elsif ($app eq 'uncompress') { | |
375 | ||
376 | $self->{mime}->{'application/x-compress'} = 1; | |
377 | ||
378 | eval { | |
379 | my @cmd = ('/bin/gunzip', '-c', $filename); | |
380 | my $cfh = IO::File->new(); | |
381 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @cmd); | |
382 | ||
383 | helper_pipe_consume ($cfh, $pid, $timeout, 128*1024, sub { | |
384 | my ($buf, $count) = @_; | |
385 | ||
386 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count) if (!$usize); | |
387 | ||
388 | $usize += $count; | |
389 | ||
390 | $self->check_comp_ratio ($filesize, $usize); | |
391 | ||
392 | $self->check_quota (1, $usize, $csize); | |
393 | ||
394 | if (!$outfd) { | |
395 | $outfd = IO::File->new; | |
396 | ||
397 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
398 | die "unable to create file $newname: $!"; | |
399 | } | |
400 | } | |
401 | ||
402 | if (!$outfd->print ($buf)) { | |
403 | die "unable to write '$newname' - $!"; | |
404 | } | |
405 | ||
406 | }); | |
407 | }; | |
776097f1 | 408 | |
425ef29c DM |
409 | $err = $@; |
410 | } | |
411 | ||
412 | $outfd->close () if $outfd; | |
413 | ||
414 | if ($err) { | |
415 | unlink $newname; | |
416 | die $err; | |
417 | } | |
418 | ||
419 | $self->check_quota (1, $usize, $csize, 1); | |
420 | ||
421 | $self->todo_list_add ($newname, $ct, $usize); | |
422 | ||
423 | return $newname; | |
424 | }; | |
425 | ||
426 | # calculate real filesystem space (needed by ext3 to store files/dirs) | |
427 | sub realsize { | |
428 | my ($size, $isdir) = @_; | |
429 | ||
430 | my $bs = 4096; # ext3 block size | |
431 | ||
432 | $size = max2 ($size, $bs) if $isdir; # dirs needs at least one block | |
433 | ||
434 | return int (($size + $bs - 1) / $bs) * $bs; # round up to block size | |
435 | } | |
436 | ||
437 | sub todo_list_add { | |
438 | my ($self, $filename, $ct, $size) = @_; | |
439 | ||
440 | if ($ct) { | |
441 | $self->{mime}->{$ct} = 1; | |
442 | if (is_archive ($ct)) { | |
443 | push @{$self->{todo}}, [$filename, $ct, $size]; | |
444 | } | |
445 | } | |
446 | } | |
447 | ||
448 | sub check_timeout { | |
449 | my ($self) = @_; | |
776097f1 | 450 | |
425ef29c DM |
451 | my $elapsed = int (tv_interval ($self->{starttime})); |
452 | my $timeout = $self->{timeout} - $elapsed; | |
453 | ||
454 | die "got timeout\n" if $timeout <= 0; | |
455 | ||
456 | return $timeout; | |
457 | } | |
458 | ||
459 | sub check_comp_ratio { | |
460 | my ($self, $compsize, $usize) = @_; | |
461 | ||
462 | return if !$compsize || !$self->{maxratio}; | |
463 | ||
464 | my $ratio = $usize/$compsize; | |
465 | ||
1359baef | 466 | die "compression ratio too large (> $self->{maxratio})" |
425ef29c DM |
467 | if $ratio > $self->{maxratio}; |
468 | } | |
469 | ||
470 | sub check_quota { | |
471 | my ($self, $files, $size, $csize, $commit) = @_; | |
472 | ||
473 | my $sizediff = $csize ? $size - $csize : $size; | |
474 | ||
1359baef | 475 | die "compression ratio too large (> $self->{maxratio})" |
425ef29c DM |
476 | if $self->{maxratio} && (($self->{size} + $sizediff) > $self->{ratioquota}); |
477 | ||
776097f1 | 478 | die "archive too large (> $self->{quota})" |
425ef29c DM |
479 | if ($self->{size} + $sizediff) > $self->{quota}; |
480 | ||
481 | die "unexpected number of files '$files'" if $files <= 0; | |
482 | ||
483 | $files-- if ($csize); | |
484 | ||
485 | die "too many files in archive (> $self->{maxfiles})" | |
486 | if $self->{maxfiles} && (($self->{files} + $files) > $self->{maxfiles}); | |
487 | ||
488 | if ($commit) { | |
489 | $self->{files} += $files; | |
490 | $self->{size} += $sizediff; | |
491 | } | |
776097f1 | 492 | |
425ef29c DM |
493 | } |
494 | ||
495 | sub add_glob_mime_type { | |
496 | my ($self, $filename) = @_; | |
497 | ||
29d30ef8 DC |
498 | my $basename = basename($filename); |
499 | $self->{filenames}->{$basename} = 1; | |
500 | ||
501 | if (my $ct = xdg_mime_get_mime_type_from_file_name($basename)) { | |
425ef29c DM |
502 | $self->{mime}->{$ct} = 1 if $ct ne 'application/octet-stream'; |
503 | } | |
504 | } | |
505 | ||
425ef29c DM |
506 | sub unpack_mime { |
507 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
508 | ||
509 | my $size = 0; | |
510 | my $files = 0; | |
511 | ||
512 | my $timeout = $self->check_timeout(); | |
513 | ||
514 | eval { | |
515 | run_with_timeout ($timeout, sub { | |
776097f1 | 516 | |
425ef29c | 517 | # Create a new MIME parser: |
18598b2c | 518 | my $max; |
425ef29c | 519 | if ($self->{maxfiles}) { |
18598b2c | 520 | $max = $self->{maxfiles} - $self->{files}; |
425ef29c DM |
521 | } |
522 | ||
18598b2c DC |
523 | my $parser = PMG::MIMEUtils::new_mime_parser({ |
524 | dumpdir => $tmpdir, | |
525 | nested => 1, | |
526 | ignore_errors => 1, | |
527 | extract_uuencode => 1, | |
528 | ignore_filename => 1, | |
529 | maxfiles => $max, | |
530 | }, 1); | |
531 | ||
425ef29c DM |
532 | my $entity = $parser->parse_open ($filename); |
533 | ||
18598b2c DC |
534 | PMG::MIMEUtils::traverse_mime_parts($entity, sub { |
535 | my ($part) = @_; | |
536 | my $ct = $part->head->mime_attr('content-type'); | |
537 | $self->{mime}->{$ct} = 1 if $ct && length($ct) < 256; | |
776097f1 | 538 | |
18598b2c DC |
539 | if (my $body = $part->bodyhandle) { |
540 | my $path = $body->path; | |
541 | $size += -s $path; | |
542 | $files++; | |
543 | } | |
544 | }); | |
425ef29c DM |
545 | }); |
546 | }; | |
547 | ||
548 | my $err = $@; | |
549 | ||
550 | die $err if $err; | |
551 | ||
552 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
553 | ||
554 | return 1; | |
555 | ||
556 | } | |
557 | ||
558 | sub unpack_zip { | |
559 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
560 | ||
561 | my $size = 0; | |
562 | my $files = 0; | |
563 | ||
564 | my $timeout = $self->check_timeout(); | |
565 | ||
566 | eval { | |
567 | ||
568 | my $zip = Archive::Zip->new (); | |
569 | ||
570 | Archive::Zip::setErrorHandler (sub { die @_ }); | |
571 | ||
572 | run_with_timeout ($timeout, sub { | |
573 | ||
574 | my $status = $zip->read ($filename); | |
575 | die "unable to open zip file '$filename'" if $status != AZ_OK; | |
776097f1 | 576 | |
425ef29c DM |
577 | my $tid = 1; |
578 | foreach my $mem ($zip->members) { | |
579 | ||
580 | $files++; | |
581 | ||
582 | my $cm = $mem->compressionMethod(); | |
583 | die "unsupported zip compression method '$cm'\n" | |
776097f1 | 584 | if !(($cm == COMPRESSION_DEFLATED || |
425ef29c DM |
585 | $cm == COMPRESSION_STORED)); |
586 | ||
587 | die "encrypted archive detected\n" | |
588 | if $mem->isEncrypted(); | |
589 | ||
590 | my $us = $mem->uncompressedSize(); | |
591 | ||
592 | next if $us <= 0; # skip zero size files | |
593 | ||
594 | if ($mem->isDirectory) { | |
595 | $size += realsize ($us, 1); | |
596 | } else { | |
597 | $size += realsize ($us); | |
598 | } | |
599 | ||
600 | $self->check_comp_ratio ($filesize, $size); | |
601 | ||
602 | $self->check_quota ($files, $size, $csize); | |
603 | ||
604 | next if $mem->isDirectory; # skip dirs | |
605 | ||
606 | my $name = basename ($mem->fileName()); | |
607 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
608 | my $newfn = sprintf "$tmpdir/Z%08d_$name", $tid++; | |
609 | ||
610 | $self->add_glob_mime_type ($name); | |
611 | ||
612 | my $outfd = IO::File->new; | |
613 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
776097f1 | 614 | die "unable to create file $newfn: $!"; |
425ef29c DM |
615 | } |
616 | ||
617 | my $ct; | |
618 | ||
619 | eval { | |
620 | ||
621 | $mem->desiredCompressionMethod (COMPRESSION_STORED); | |
622 | ||
623 | $status = $mem->rewindData(); | |
624 | ||
625 | die "unable to rewind zip stream" if $status != AZ_OK; | |
626 | ||
627 | my $outRef; | |
628 | my $bytes = 0; | |
629 | while ($status == AZ_OK) { | |
630 | ($outRef, $status) = $mem->readChunk(); | |
776097f1 | 631 | die "unable to read zip member" |
425ef29c DM |
632 | if ($status != AZ_OK && $status != AZ_STREAM_END); |
633 | ||
634 | my $len = length ($$outRef); | |
635 | if ($len > 0) { | |
636 | $ct = xdg_mime_get_mime_type_for_data ($$outRef, $len) if (!$bytes); | |
637 | $outfd->print ($$outRef) || die "write error during zip copy"; | |
638 | $bytes += $len; | |
639 | } | |
640 | ||
641 | last if $status == AZ_STREAM_END; | |
642 | } | |
643 | ||
776097f1 | 644 | $mem->endRead(); |
425ef29c DM |
645 | |
646 | $self->todo_list_add ($newfn, $ct, $bytes); | |
647 | ||
648 | }; | |
649 | ||
650 | my $err = $@; | |
651 | ||
652 | $outfd->close (); | |
653 | ||
654 | if ($err) { | |
655 | unlink $newfn; | |
656 | die $err; | |
657 | } | |
658 | } | |
659 | }); | |
660 | }; | |
661 | ||
662 | my $err = $@; | |
663 | ||
664 | die $err if $err; | |
665 | ||
666 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
667 | ||
668 | return 1; | |
669 | } | |
670 | ||
671 | sub unpack_tar { | |
672 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
673 | ||
674 | my $size = 0; | |
675 | my $files = 0; | |
676 | ||
677 | my $timeout = $self->check_timeout(); | |
678 | ||
679 | my $a = LibArchive::archive_read_new(); | |
680 | ||
681 | die "unable to create LibArchive object" if !$a; | |
682 | ||
683 | LibArchive::archive_read_support_format_all ($a); | |
2e08e3c9 | 684 | LibArchive::archive_read_support_filter_all ($a); |
425ef29c DM |
685 | |
686 | eval { | |
687 | run_with_timeout ($timeout, sub { | |
688 | ||
689 | if ((my $r = LibArchive::archive_read_open_filename ($a, $filename, 10240))) { | |
776097f1 | 690 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); |
425ef29c DM |
691 | } |
692 | my $tid = 1; | |
693 | for (;;) { | |
694 | my $entry; | |
695 | my $r = LibArchive::archive_read_next_header ($a, $entry); | |
696 | ||
697 | last if ($r == LibArchive::ARCHIVE_EOF); | |
698 | ||
699 | if ($r != LibArchive::ARCHIVE_OK) { | |
700 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); | |
701 | } | |
702 | ||
703 | my $us = LibArchive::archive_entry_size ($entry); | |
704 | my $mode = LibArchive::archive_entry_mode ($entry); | |
705 | ||
706 | my $rs; | |
707 | if (POSIX::S_ISREG ($mode)) { | |
708 | $rs = realsize ($us); | |
709 | } else { | |
710 | $rs = POSIX::S_ISDIR ($mode) ? realsize ($us, 1) : 256; | |
711 | } | |
712 | $size += $rs; | |
713 | $files += 1; | |
714 | ||
715 | $self->check_comp_ratio ($filesize, $size); | |
716 | ||
717 | $self->check_quota ($files, $size, $csize); | |
718 | ||
719 | next if POSIX::S_ISDIR ($mode); | |
720 | next if !POSIX::S_ISREG ($mode); | |
721 | ||
722 | my $name = basename (LibArchive::archive_entry_pathname ($entry)); | |
723 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
724 | my $newfn = sprintf "$tmpdir/A%08d_$name", $tid++; | |
725 | ||
726 | $self->add_glob_mime_type ($name); | |
727 | ||
728 | my $outfd; | |
729 | ||
730 | eval { | |
731 | my $bytes = 0; | |
732 | my $ct; | |
733 | my $todo = 1; | |
734 | ||
735 | if ($us > 0) { | |
736 | my $len; | |
737 | my $buf; | |
738 | while (($len = LibArchive::archive_read_data($a, $buf, 128*1024)) > 0) { | |
776097f1 | 739 | |
425ef29c DM |
740 | if (!$bytes) { |
741 | if ($ct = xdg_mime_get_mime_type_for_data ($buf, $len)) { | |
742 | $self->{mime}->{$ct} = 1; | |
743 | ||
744 | if (!is_archive ($ct)) { | |
745 | $todo = 0; | |
746 | last if $self->{ctonly}; | |
747 | } | |
748 | } | |
749 | } | |
776097f1 | 750 | |
425ef29c DM |
751 | $bytes += $len; |
752 | ||
753 | if (!$outfd) { # create only when needed | |
754 | $outfd = IO::File->new; | |
776097f1 | 755 | |
425ef29c DM |
756 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { |
757 | die "unable to create file $newfn: $!"; | |
758 | } | |
759 | } | |
760 | ||
761 | if (!$outfd->print ($buf)) { | |
762 | die "unable to write '$newfn' - $!"; | |
763 | } | |
764 | } | |
765 | ||
766 | die ("error reading archive (encrypted)\n") | |
767 | if ($len < 0); | |
768 | } | |
769 | ||
770 | $self->todo_list_add ($newfn, $ct, $bytes) if $todo; | |
771 | }; | |
772 | ||
773 | my $err = $@; | |
774 | ||
775 | $outfd->close () if $outfd; | |
776097f1 | 776 | |
425ef29c DM |
777 | if ($err) { |
778 | unlink $newfn; | |
779 | die $err; | |
780 | } | |
776097f1 | 781 | } |
425ef29c DM |
782 | }); |
783 | }; | |
784 | ||
785 | my $err = $@; | |
786 | ||
787 | LibArchive::archive_read_close($a); | |
2e08e3c9 | 788 | LibArchive::archive_read_free($a); |
776097f1 | 789 | |
425ef29c DM |
790 | die $err if $err; |
791 | ||
792 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
793 | ||
794 | return 1; | |
795 | } | |
796 | ||
797 | sub generic_unpack { | |
798 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
799 | ||
800 | my $size = 0; | |
801 | my $files = 0; | |
802 | ||
803 | my $timeout = $self->check_timeout(); | |
804 | ||
805 | my @listcmd; | |
806 | my @restorecmd = ('/bin/false'); | |
807 | ||
776097f1 | 808 | my $filter; |
425ef29c DM |
809 | |
810 | if ($app eq 'tar') { | |
811 | @listcmd = ('/bin/tar', '-tvf', $filename); | |
812 | @restorecmd = ('/bin/tar', '-x', '--backup=number', "--transform='s,[^A-Za-z0-9\./],-,g'", '-o', | |
813 | '-m', '-C', $tmpdir, '-f', $filename); | |
814 | $filter = sub { | |
815 | my $line = shift; | |
816 | if ($line =~ m/^(\S)\S+\s+\S+\s+([\d,\.]+)\s+\S+/) { | |
817 | my ($type, $bytes) = ($1, $2); | |
818 | $bytes =~ s/[,\.]//g; | |
819 | ||
820 | if ($type eq 'd') { | |
821 | $bytes = realsize ($bytes, 1); | |
822 | } elsif ($type eq '-') { | |
823 | $bytes = realsize ($bytes); | |
824 | } else { | |
825 | $bytes = 256; # simple assumption | |
826 | } | |
827 | ||
828 | $size += $bytes; | |
829 | $files++; | |
830 | ||
831 | $self->check_comp_ratio ($filesize, $size); | |
832 | $self->check_quota ($files, $size, $csize); | |
833 | ||
834 | } else { | |
835 | die "can't parse tar output: $line\n"; | |
836 | } | |
837 | } | |
838 | } elsif ($app eq '7z' || $app eq '7zsimple') { | |
839 | # Note: set password to 'none' with '-pnone', to avoid reading from /dev/tty | |
840 | @restorecmd = ('/usr/bin/7z', 'e', '-pnone', '-bd', '-y', '-aou', "-w$self->{tmpdir}", "-o$tmpdir", $filename); | |
841 | ||
842 | @listcmd = ('/usr/bin/7z', 'l', '-slt', $filename); | |
843 | ||
844 | my ($path, $folder, $bytes); | |
845 | ||
846 | $filter = sub { | |
847 | my $line = shift; | |
848 | chomp $line; | |
849 | ||
850 | if ($line =~ m/^\s*\z/) { | |
851 | if (defined ($path) && defined ($bytes)) { | |
852 | $bytes = realsize ($bytes, $folder); | |
853 | $size += $bytes; | |
854 | $files++; | |
855 | ||
856 | $self->check_comp_ratio ($filesize, $size); | |
857 | $self->check_quota ($files, $size, $csize); | |
858 | } | |
859 | undef $path; | |
860 | undef $folder; | |
861 | undef $bytes; | |
776097f1 | 862 | |
425ef29c DM |
863 | } elsif ($line =~ m/^Path = (.*)\z/s) { |
864 | $path = $1; | |
865 | } elsif ($line =~ m/^Size = (\d+)\z/s) { | |
866 | $bytes = $1; | |
867 | } elsif ($line =~ m/^Folder = (\d+)\z/s) { | |
868 | $folder = $1; | |
869 | } elsif ($line =~ m/^Attributes = ([D\.][R\.][H\.][S\.][A\.])\z/s) { | |
870 | $folder = 1 if $1 && substr ($1, 0, 1) eq 'D'; | |
871 | } | |
872 | }; | |
873 | ||
874 | } elsif ($app eq 'tnef') { | |
875 | @listcmd = ('/usr/bin/tnef', '-tv', '-f', $filename); | |
876 | @restorecmd = ('/usr/bin/tnef', '-C', $tmpdir, '--number-backups', '-f', $filename); | |
877 | ||
878 | $filter = sub { | |
879 | my $line = shift; | |
880 | chomp $line; | |
881 | ||
882 | if ($line =~ m!^\s*(\d+)\s*|\s*\d{4}/\d{1,2}/\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2}\s*|!) { | |
883 | my $bytes = $1; | |
884 | ||
885 | $bytes = realsize ($bytes); | |
886 | $size += $bytes; | |
887 | $files++; | |
888 | ||
889 | $self->check_comp_ratio ($filesize, $size); | |
890 | $self->check_quota ($files, $size, $csize); | |
891 | } else { | |
892 | die "can't parse tnef output\n"; | |
893 | } | |
894 | ||
895 | }; | |
896 | ||
897 | } else { | |
898 | die "unknown application '$app'"; | |
899 | } | |
900 | ||
901 | eval { | |
902 | ||
903 | my $cfh = IO::File->new(); | |
904 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @listcmd); | |
776097f1 | 905 | |
425ef29c DM |
906 | helper_pipe_consume ($cfh, $pid, $timeout, 0, $filter); |
907 | }; | |
908 | ||
909 | my $err = $@; | |
910 | ||
911 | die $err if $err; | |
912 | ||
913 | return if !$files; # empty archive | |
914 | ||
915 | $self->check_quota ($files, $size, $csize, 1); | |
916 | ||
917 | $timeout = $self->check_timeout(); | |
918 | ||
919 | my $cfh = IO::File->new(); | |
920 | my $pid = helper_pipe_open ($cfh, '/dev/null', undef, @restorecmd); | |
921 | helper_pipe_consume ($cfh, $pid, $timeout, 0, sub { | |
922 | my $line = shift; | |
923 | print "$app: $line" if $self->{debug}; | |
924 | }); | |
925 | ||
926 | return 1; | |
927 | } | |
928 | ||
929 | sub unpack_dir { | |
930 | my ($self, $dirname, $level) = @_; | |
931 | ||
932 | local (*DIR); | |
933 | ||
934 | print "unpack dir '$dirname'\n" if $self->{debug}; | |
935 | ||
936 | opendir(DIR, $dirname) || die "can't opendir $dirname: $!"; | |
937 | ||
938 | my $name; | |
939 | ||
776097f1 | 940 | while (defined ($name = readdir (DIR))) { |
425ef29c DM |
941 | my $path = "$dirname/$name"; |
942 | my $st = lstat ($path); | |
943 | ||
944 | if (!$st) { | |
945 | die "no such file '$path' - $!"; | |
946 | } elsif (POSIX::S_ISDIR ($st->mode)) { | |
947 | next if ($name eq '.' || $name eq '..'); | |
948 | $self->unpack_dir ($path, $level); | |
949 | } elsif (POSIX::S_ISREG ($st->mode)) { | |
950 | my $size = $st->size; | |
951 | $self->__unpack_archive ($path, $level + 1, $size); | |
952 | } | |
953 | } | |
954 | ||
955 | closedir DIR; | |
956 | } | |
957 | ||
958 | sub unpack_todo { | |
959 | my ($self, $level) = @_; | |
960 | ||
961 | my $ta = $self->{todo}; | |
962 | $self->{todo} = []; | |
963 | ||
964 | foreach my $todo (@$ta) { | |
965 | $self->__unpack_archive ($todo->[0], $level, $todo->[2], $todo->[1]); | |
966 | } | |
967 | } | |
968 | ||
969 | sub __unpack_archive { | |
970 | my ($self, $filename, $level, $size, $ct) = @_; | |
971 | ||
972 | $level = 0 if !$level; | |
973 | ||
776097f1 | 974 | $self->{levels} = max2($self->{levels}, $level); |
425ef29c DM |
975 | |
976 | if ($self->{maxrec} && ($level >= $self->{maxrec})) { | |
776097f1 | 977 | return if $self->{maxrec_soft}; |
425ef29c DM |
978 | die "max recursion limit reached\n"; |
979 | } | |
980 | ||
981 | die "undefined file size" if !defined ($size); | |
982 | ||
983 | return if !$size; # nothing to do | |
984 | ||
985 | if (!$ct) { | |
91a5af02 DM |
986 | $ct = PMG::Utils::magic_mime_type_for_file($filename); |
987 | $self->add_glob_mime_type($filename); | |
425ef29c DM |
988 | } |
989 | ||
990 | if ($ct) { | |
991 | $self->{mime}->{$ct} = 1; | |
992 | ||
776097f1 | 993 | if (defined($decompressors->{$ct})) { |
425ef29c DM |
994 | |
995 | my ($app, $code) = @{$decompressors->{$ct}}; | |
996 | ||
997 | if ($app) { | |
998 | ||
999 | # we try to keep extension correctly | |
776097f1 DM |
1000 | my $tmp = basename($filename); |
1001 | ($ct eq 'application/gzip') && | |
425ef29c | 1002 | $tmp =~ s/\.gz\z//; |
776097f1 | 1003 | ($ct eq 'application/x-bzip') && |
425ef29c | 1004 | $tmp =~ s/\.bz2?\z//; |
776097f1 | 1005 | ($ct eq 'application/x-compress') && |
425ef29c | 1006 | $tmp =~ s/\.Z\z//; |
776097f1 | 1007 | ($ct eq 'application/x-compressed-tar') && |
425ef29c | 1008 | $tmp =~ s/\.gz\z// || $tmp =~ s/\.tgz\z/.tar/; |
776097f1 | 1009 | ($ct eq 'application/x-bzip-compressed-tar') && |
425ef29c DM |
1010 | $tmp =~ s/\.bz2?\z// || $tmp =~ s/\.tbz\z/.tar/; |
1011 | ($ct eq 'application/x-tarz') && | |
1012 | $tmp =~ s/\.Z\z//; | |
1013 | ||
776097f1 | 1014 | my $newname = sprintf "%s/DC_%08d_%s", $self->{tmpdir}, ++$self->{ufid}, $tmp; |
425ef29c DM |
1015 | |
1016 | print "Decomp: $filename\n\t($ct) with $app to $newname\n" | |
776097f1 | 1017 | if $self->{debug}; |
425ef29c | 1018 | |
776097f1 | 1019 | if (my $res = &$code($self, $app, $filename, $newname, $level ? $size : 0, $size)) { |
425ef29c DM |
1020 | unlink $filename if $level; |
1021 | $self->unpack_todo ($level + 1); | |
1022 | } | |
1023 | } | |
1024 | } elsif (defined ($unpackers->{$ct})) { | |
1025 | ||
1026 | my ($app, $code, $ctdetect) = @{$unpackers->{$ct}}; | |
1027 | ||
1028 | if ($app) { | |
1029 | ||
776097f1 | 1030 | my $tmpdir = sprintf "%s/DIR_%08d", $self->{tmpdir}, ++$self->{udid}; |
425ef29c | 1031 | mkdir $tmpdir; |
776097f1 | 1032 | |
425ef29c DM |
1033 | print "Unpack: $filename\n\t($ct) with $app to $tmpdir\n" |
1034 | if $self->{debug}; | |
1035 | ||
1036 | if (my $res = &$code ($self, $app, $filename, $tmpdir, $level ? $size : 0, $size)) { | |
1037 | unlink $filename if $level; | |
1038 | ||
1039 | if ($ctdetect) { | |
1040 | $self->unpack_todo ($level + 1); | |
1041 | } else { | |
1042 | $self->unpack_dir ($tmpdir, $level); | |
1043 | } | |
1044 | } | |
1045 | } | |
1046 | } | |
1047 | } | |
1048 | } | |
1049 | ||
1050 | sub is_archive { | |
776097f1 | 1051 | my ($ct) = @_; |
425ef29c | 1052 | |
776097f1 | 1053 | return defined($decompressors->{$ct}) || defined($unpackers->{$ct}); |
425ef29c DM |
1054 | } |
1055 | ||
1056 | # unpack_archive | |
1057 | # | |
1058 | # Description: unpacks an archive and records containing | |
1059 | # content types (detected by magic numbers and file extension) | |
776097f1 DM |
1060 | # Extracted files are stored inside 'tempdir'. |
1061 | # | |
1359baef | 1062 | # returns: true if file is archive, undef otherwise |
425ef29c DM |
1063 | |
1064 | sub unpack_archive { | |
1065 | my ($self, $filename, $ct) = @_; | |
1066 | ||
776097f1 | 1067 | my $st = lstat($filename); |
425ef29c DM |
1068 | my $size = 0; |
1069 | ||
1070 | if (!$st) { | |
1071 | die "no such file '$filename' - $!"; | |
776097f1 | 1072 | } elsif (POSIX::S_ISREG($st->mode)) { |
425ef29c DM |
1073 | $size = $st->size; |
1074 | ||
1075 | return if !$size; # do nothing | |
1076 | ||
1077 | $self->{quota} = $self->{maxquota} - $self->{size}; | |
1078 | ||
1079 | $self->{ratioquota} = $size * $self->{maxratio} if $self->{maxratio}; | |
776097f1 | 1080 | |
425ef29c DM |
1081 | } else { |
1082 | return; # do nothing | |
1083 | } | |
776097f1 | 1084 | |
91a5af02 | 1085 | $ct = PMG::Utils::magic_mime_type_for_file($filename) if !$ct; |
425ef29c | 1086 | |
776097f1 | 1087 | return if (!$ct || !is_archive($ct)); # not an archive |
425ef29c DM |
1088 | |
1089 | eval { | |
776097f1 | 1090 | $self->__unpack_archive($filename, 0, $st->size, $ct); |
425ef29c DM |
1091 | }; |
1092 | ||
1093 | my $err = $@; | |
1094 | ||
776097f1 | 1095 | printf "ELAPSED: %.2f ms $filename\n", |
425ef29c DM |
1096 | int(tv_interval ($self->{starttime}) * 1000) |
1097 | if $self->{debug}; | |
1098 | ||
1099 | if ($err) { | |
776097f1 | 1100 | $self->{mime}->{'proxmox/unreadable-archive'} = 1; |
425ef29c DM |
1101 | die $err; |
1102 | } | |
1103 | return 1; | |
1104 | } | |
1105 | ||
1106 | 1; | |
1107 | __END__ |