]>
Commit | Line | Data |
---|---|---|
91a5af02 | 1 | package PMG::Unpack; |
425ef29c DM |
2 | |
3 | use strict; | |
91a5af02 | 4 | use warnings; |
425ef29c DM |
5 | use IO::File; |
6 | use IO::Select; | |
7 | use Xdgmime; | |
91a5af02 DM |
8 | use Compress::Zlib qw(gzopen); |
9 | use Compress::Bzip2 qw(bzopen); | |
425ef29c | 10 | use File::Path; |
91a5af02 | 11 | use File::Temp qw(tempdir); |
425ef29c DM |
12 | use File::Basename; |
13 | use File::stat; | |
14 | use POSIX ":sys_wait_h"; | |
91a5af02 | 15 | use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); |
425ef29c DM |
16 | use Archive::Zip qw(:CONSTANTS :ERROR_CODES); |
17 | use LibArchive; | |
425ef29c DM |
18 | use MIME::Parser; |
19 | ||
91a5af02 | 20 | use PMG::Utils; |
18598b2c | 21 | use PMG::MIMEUtils; |
91a5af02 | 22 | |
425ef29c DM |
23 | my $unpackers = { |
24 | ||
25 | # TAR | |
26 | 'application/x-tar' => [ 'tar', \&unpack_tar, 1], | |
27 | #'application/x-tar' => [ 'tar', \&generic_unpack ], | |
28 | #'application/x-tar' => [ '7z', \&generic_unpack ], | |
29 | 'application/x-compressed-tar' => [ 'tar', \&unpack_tar, 1], | |
30 | ||
31 | # CPIO | |
32 | 'application/x-cpio' => [ 'cpio', \&unpack_tar, 1], | |
33 | #'application/x-cpio' => [ '7z', \&generic_unpack ], | |
34 | ||
35 | # ZIP | |
776097f1 DM |
36 | #'application/zip' => [ 'zip', \&unpack_tar, 1], |
37 | 'application/zip' => [ '7z', \&generic_unpack ], | |
425ef29c DM |
38 | |
39 | # 7z | |
40 | 'application/x-7z-compressed' => [ '7z', \&generic_unpack ], | |
41 | ||
42 | # RAR | |
776097f1 | 43 | 'application/vnd.rar' => [ '7z', \&generic_unpack ], |
425ef29c DM |
44 | |
45 | # ARJ | |
46 | 'application/x-arj' => [ '7z', \&generic_unpack ], | |
47 | ||
48 | # RPM | |
49 | 'application/x-rpm' => [ '7z', \&generic_unpack ], | |
50 | ||
51 | # DEB | |
776097f1 | 52 | 'application/vnd.debian.binary-package' => [ 'ar', \&unpack_tar, 1], |
425ef29c DM |
53 | |
54 | # MS CAB | |
55 | 'application/vnd.ms-cab-compressed' => [ '7z', \&generic_unpack ], | |
56 | ||
57 | # LZH/LHA | |
58 | 'application/x-lha' => [ '7z', \&generic_unpack ], | |
59 | ||
60 | # TNEF (winmail.dat) | |
61 | 'application/vnd.ms-tnef' => [ 'tnef', \&generic_unpack ], | |
62 | ||
63 | # message/rfc822 | |
64 | 'message/rfc822' => [ 'mime', \&unpack_mime ], | |
65 | ||
66 | ## CHM, Nsis - supported by 7z, but we currently do not | |
67 | ||
68 | ##'application/x-zoo' - old format - no support | |
69 | ##'application/x-ms-dos-executable' - exe should be blocked anyways - no support | |
70 | ## application/x-arc - old format - no support | |
71 | }; | |
72 | ||
73 | my $decompressors = { | |
776097f1 | 74 | 'application/gzip' => [ 'guzip', \&uncompress_file ], |
425ef29c DM |
75 | 'application/x-compress' => [ 'uncompress', \&uncompress_file ], |
76 | # 'application/x-compressed-tar' => [ 'guzip', \&uncompress_file ], # unpack_tar is faster | |
77 | 'application/x-tarz' => [ 'uncompress', \&uncompress_file ], | |
78 | 'application/x-bzip' => [ 'bunzip2', \&uncompress_file ], | |
79 | 'application/x-bzip-compressed-tar' => [ 'bunzip2', \&uncompress_file ], | |
425ef29c DM |
80 | }; |
81 | ||
82 | ||
83 | ## some helper methods | |
84 | ||
85 | sub min2 { | |
86 | return ( $_[0] < $_[1]) ? $_[0] : $_[1]; | |
87 | } | |
88 | ||
89 | sub max2 { | |
90 | return ( $_[0] > $_[1]) ? $_[0] : $_[1]; | |
91 | } | |
92 | ||
93 | # STDERR is redirected to STDOUT by default | |
94 | sub helper_pipe_open { | |
95 | my ($fh, $inputfilename, $errorfilename, @cmd) = @_; | |
96 | ||
97 | my $pid = $fh->open ('-|'); | |
776097f1 | 98 | |
425ef29c DM |
99 | die "unable to fork helper process: $!" if !defined $pid; |
100 | ||
101 | return $pid if ($pid != 0); # parent process simply returns | |
776097f1 | 102 | |
425ef29c DM |
103 | $inputfilename = '/dev/null' if !$inputfilename; |
104 | ||
105 | # same algorythm as used inside SA | |
106 | ||
107 | my $fd = fileno (STDIN); | |
108 | close STDIN; | |
109 | POSIX::close(0) if $fd != 0; | |
110 | ||
111 | if (!open (STDIN, "<$inputfilename")) { | |
776097f1 DM |
112 | POSIX::_exit (1); |
113 | kill ('KILL', $$); | |
425ef29c DM |
114 | } |
115 | ||
116 | $errorfilename = '&STDOUT' if !$errorfilename; | |
117 | ||
118 | $fd = fileno(STDERR); | |
119 | close STDERR; | |
120 | POSIX::close(2) if $fd != 2; | |
121 | ||
122 | if (!open (STDERR, ">$errorfilename")) { | |
776097f1 DM |
123 | POSIX::_exit (1); |
124 | kill ('KILL', $$); | |
425ef29c DM |
125 | } |
126 | ||
127 | exec @cmd; | |
776097f1 | 128 | |
425ef29c DM |
129 | warn "exec failed"; |
130 | ||
776097f1 DM |
131 | POSIX::_exit (1); |
132 | kill('KILL', $$); | |
425ef29c DM |
133 | die; # else -w complains |
134 | } | |
135 | ||
136 | sub helper_pipe_consume { | |
137 | my ($cfh, $pid, $timeout, $bufsize, $callback) = @_; | |
138 | ||
139 | eval { | |
140 | run_with_timeout ($timeout, sub { | |
141 | if ($bufsize) { | |
142 | my $buf; | |
143 | my $count; | |
144 | ||
145 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
146 | &$callback ($buf, $count); | |
147 | } | |
148 | die "pipe read failed" if ($count < 0); | |
149 | ||
150 | } else { | |
151 | while (my $line = <$cfh>) { | |
152 | &$callback ($line); | |
153 | } | |
154 | } | |
155 | }); | |
156 | }; | |
157 | ||
158 | my $err = $@; | |
776097f1 | 159 | |
425ef29c DM |
160 | # send TERM first if process still exits |
161 | if ($err) { | |
162 | kill (15, $pid) if kill (0, $pid); | |
163 | ||
164 | # read remaining data, if any | |
165 | my ($count, $buf); | |
166 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
167 | # do nothing | |
168 | } | |
169 | } | |
776097f1 | 170 | |
425ef29c DM |
171 | # then close pipe |
172 | my $closeerr; | |
173 | close ($cfh) || ($closeerr = $!); | |
174 | my $childstat = $?; | |
175 | ||
176 | # still alive ? | |
177 | if (kill (0, $pid)) { | |
178 | sleep (1); | |
179 | kill (9, $pid); # terminate process | |
180 | die "child '$pid' termination problems\n"; | |
181 | } | |
182 | ||
183 | die $err if $err; | |
184 | ||
185 | die "child '$pid' close failed - $closeerr\n" if $closeerr; | |
186 | ||
187 | die "child '$pid' failed: $childstat\n" if $childstat; | |
188 | } | |
189 | ||
190 | sub run_with_timeout { | |
191 | my ($timeout, $code, @param) = @_; | |
192 | ||
193 | die "got timeout\n" if $timeout <= 0; | |
194 | ||
195 | my $prev_alarm; | |
196 | ||
197 | my $sigcount = 0; | |
198 | ||
199 | my $res; | |
200 | ||
201 | eval { | |
202 | local $SIG{ALRM} = sub { $sigcount++; die "got timeout\n"; }; | |
203 | local $SIG{PIPE} = sub { $sigcount++; die "broken pipe\n" }; | |
204 | local $SIG{__DIE__}; # see SA bug 4631 | |
205 | ||
206 | $prev_alarm = alarm ($timeout); | |
207 | ||
208 | $res = &$code (@param); | |
209 | ||
210 | alarm 0; # avoid race conditions | |
211 | }; | |
212 | ||
213 | my $err = $@; | |
776097f1 | 214 | |
425ef29c DM |
215 | alarm ($prev_alarm) if defined ($prev_alarm); |
216 | ||
217 | die "unknown error" if $sigcount && !$err; # seems to happen sometimes | |
218 | ||
219 | die $err if $err; | |
220 | ||
221 | return $res; | |
222 | } | |
223 | ||
224 | # the unpacker object constructor | |
225 | ||
226 | sub new { | |
227 | my ($type, %param) = @_; | |
228 | ||
229 | my $self = {}; | |
230 | bless $self, $type; | |
231 | ||
232 | $self->{tmpdir} = $param{tmpdir} || tempdir (CLEANUP => 1); | |
233 | $self->{starttime} = [gettimeofday]; | |
234 | $self->{timeout} = $param{timeout} || 3600*24; | |
235 | ||
236 | # maxfiles: 0 = disabled | |
237 | $self->{maxfiles} = defined ($param{maxfiles}) ? $param{maxfiles} : 1000; | |
238 | ||
239 | $param{maxrec} = 0 if !defined ($param{maxrec}); | |
240 | if ($param{maxrec} < 0) { | |
241 | $param{maxrec} = - $param{maxrec}; | |
242 | $self->{maxrec_soft} = 1; # do not die when limit reached | |
243 | } | |
244 | ||
245 | $self->{maxrec} = $param{maxrec} || 8; # 0 = disabled | |
246 | $self->{maxratio} = $param{maxratio} || 0; # 0 = disabled | |
247 | ||
776097f1 | 248 | $self->{maxquota} = $param{quota} || 250*1024*1024; # 250 MB |
425ef29c DM |
249 | |
250 | $self->{ctonly} = $param{ctonly}; # only detect contained content types | |
251 | ||
252 | # internal | |
776097f1 | 253 | $self->{quota} = 0; |
425ef29c DM |
254 | $self->{ratioquota} = 0; |
255 | $self->{size} = 0; | |
256 | $self->{files} = 0; | |
257 | $self->{levels} = 0; | |
258 | ||
259 | $self->{debug} = $param{debug} || 0; | |
260 | ||
261 | $self->{mime} = {}; | |
262 | ||
263 | $self->{ufid} = 0; # counter to creat unique file names | |
264 | $self->{udid} = 0; # counter to creat unique dir names | |
265 | $self->{ulid} = 0; # counter to creat unique link names | |
266 | ||
267 | $self->{todo} = []; | |
268 | ||
269 | return $self; | |
270 | } | |
271 | ||
272 | sub cleanup { | |
273 | my $self = shift; | |
274 | ||
275 | if ($self->{debug}) { | |
276 | system ("find '$self->{tmpdir}'"); | |
277 | } | |
278 | ||
279 | rmtree ($self->{tmpdir}); | |
280 | } | |
281 | ||
282 | sub DESTROY { | |
283 | my $self = shift; | |
284 | ||
285 | rmtree ($self->{tmpdir}); | |
286 | } | |
287 | ||
288 | ||
289 | sub uncompress_file { | |
290 | my ($self, $app, $filename, $newname, $csize, $filesize) = @_; | |
291 | ||
292 | my $timeout = $self->check_timeout(); | |
293 | ||
294 | my $maxsize = $self->{quota} - $self->{size}; | |
295 | ||
296 | if ($self->{maxratio}) { | |
297 | $maxsize = min2 ($maxsize, $filesize * $self->{maxratio}); | |
298 | } | |
299 | ||
300 | $self->add_glob_mime_type ($newname); | |
301 | ||
302 | my $outfd; | |
303 | ||
304 | my $usize = 0; | |
305 | my $err; | |
306 | my $ct; | |
307 | my $todo = 1; | |
308 | ||
309 | if ($app eq 'guzip' || $app eq 'bunzip2') { | |
310 | ||
311 | my $cfh; | |
776097f1 | 312 | |
425ef29c DM |
313 | eval { |
314 | ||
315 | # bzip provides a gz compatible interface | |
776097f1 | 316 | if ($app eq 'bunzip2') { |
425ef29c DM |
317 | $self->{mime}->{'application/x-bzip'} = 1; |
318 | $cfh = bzopen ("$filename", 'r'); | |
319 | die "bzopen '$filename' failed" if !$cfh; | |
320 | } else { | |
776097f1 | 321 | $self->{mime}->{'application/gzip'} = 1; |
425ef29c DM |
322 | $cfh = gzopen ("$filename", 'rb'); |
323 | die "gzopen '$filename' failed" if !$cfh; | |
324 | } | |
776097f1 | 325 | |
425ef29c DM |
326 | run_with_timeout ($timeout, sub { |
327 | my $count; | |
328 | my $buf; | |
329 | while (($count = $cfh->gzread ($buf, 128*1024)) > 0) { | |
330 | ||
331 | if (!$usize) { | |
332 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count); | |
333 | ||
334 | $usize += $count; | |
335 | $self->{mime}->{$ct} = 1; | |
336 | ||
337 | if (!is_archive ($ct)) { | |
338 | $todo = 0; | |
339 | ||
340 | # warning: this can lead to wrong size/quota test | |
341 | last if $self->{ctonly}; | |
342 | } | |
343 | } else { | |
344 | $usize += $count; | |
345 | } | |
346 | ||
347 | $self->check_comp_ratio ($filesize, $usize); | |
348 | ||
349 | $self->check_quota (1, $usize, $csize); | |
350 | ||
351 | if (!$outfd) { | |
352 | $outfd = IO::File->new; | |
353 | ||
354 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
355 | die "unable to create file $newname: $!"; | |
356 | } | |
357 | } | |
358 | ||
359 | if (!$outfd->print ($buf)) { | |
360 | die "unable to write '$newname' - $!"; | |
361 | } | |
362 | } | |
363 | if ($count < 0) { | |
364 | die "gzread failed"; | |
365 | } | |
366 | }); | |
367 | }; | |
368 | ||
369 | $err = $@; | |
370 | ||
371 | $cfh->gzclose(); | |
372 | ||
373 | } elsif ($app eq 'uncompress') { | |
374 | ||
375 | $self->{mime}->{'application/x-compress'} = 1; | |
376 | ||
377 | eval { | |
378 | my @cmd = ('/bin/gunzip', '-c', $filename); | |
379 | my $cfh = IO::File->new(); | |
380 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @cmd); | |
381 | ||
382 | helper_pipe_consume ($cfh, $pid, $timeout, 128*1024, sub { | |
383 | my ($buf, $count) = @_; | |
384 | ||
385 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count) if (!$usize); | |
386 | ||
387 | $usize += $count; | |
388 | ||
389 | $self->check_comp_ratio ($filesize, $usize); | |
390 | ||
391 | $self->check_quota (1, $usize, $csize); | |
392 | ||
393 | if (!$outfd) { | |
394 | $outfd = IO::File->new; | |
395 | ||
396 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
397 | die "unable to create file $newname: $!"; | |
398 | } | |
399 | } | |
400 | ||
401 | if (!$outfd->print ($buf)) { | |
402 | die "unable to write '$newname' - $!"; | |
403 | } | |
404 | ||
405 | }); | |
406 | }; | |
776097f1 | 407 | |
425ef29c DM |
408 | $err = $@; |
409 | } | |
410 | ||
411 | $outfd->close () if $outfd; | |
412 | ||
413 | if ($err) { | |
414 | unlink $newname; | |
415 | die $err; | |
416 | } | |
417 | ||
418 | $self->check_quota (1, $usize, $csize, 1); | |
419 | ||
420 | $self->todo_list_add ($newname, $ct, $usize); | |
421 | ||
422 | return $newname; | |
423 | }; | |
424 | ||
425 | # calculate real filesystem space (needed by ext3 to store files/dirs) | |
426 | sub realsize { | |
427 | my ($size, $isdir) = @_; | |
428 | ||
429 | my $bs = 4096; # ext3 block size | |
430 | ||
431 | $size = max2 ($size, $bs) if $isdir; # dirs needs at least one block | |
432 | ||
433 | return int (($size + $bs - 1) / $bs) * $bs; # round up to block size | |
434 | } | |
435 | ||
436 | sub todo_list_add { | |
437 | my ($self, $filename, $ct, $size) = @_; | |
438 | ||
439 | if ($ct) { | |
440 | $self->{mime}->{$ct} = 1; | |
441 | if (is_archive ($ct)) { | |
442 | push @{$self->{todo}}, [$filename, $ct, $size]; | |
443 | } | |
444 | } | |
445 | } | |
446 | ||
447 | sub check_timeout { | |
448 | my ($self) = @_; | |
776097f1 | 449 | |
425ef29c DM |
450 | my $elapsed = int (tv_interval ($self->{starttime})); |
451 | my $timeout = $self->{timeout} - $elapsed; | |
452 | ||
453 | die "got timeout\n" if $timeout <= 0; | |
454 | ||
455 | return $timeout; | |
456 | } | |
457 | ||
458 | sub check_comp_ratio { | |
459 | my ($self, $compsize, $usize) = @_; | |
460 | ||
461 | return if !$compsize || !$self->{maxratio}; | |
462 | ||
463 | my $ratio = $usize/$compsize; | |
464 | ||
465 | die "compresion ratio too large (> $self->{maxratio})" | |
466 | if $ratio > $self->{maxratio}; | |
467 | } | |
468 | ||
469 | sub check_quota { | |
470 | my ($self, $files, $size, $csize, $commit) = @_; | |
471 | ||
472 | my $sizediff = $csize ? $size - $csize : $size; | |
473 | ||
474 | die "compresion ratio too large (> $self->{maxratio})" | |
475 | if $self->{maxratio} && (($self->{size} + $sizediff) > $self->{ratioquota}); | |
476 | ||
776097f1 | 477 | die "archive too large (> $self->{quota})" |
425ef29c DM |
478 | if ($self->{size} + $sizediff) > $self->{quota}; |
479 | ||
480 | die "unexpected number of files '$files'" if $files <= 0; | |
481 | ||
482 | $files-- if ($csize); | |
483 | ||
484 | die "too many files in archive (> $self->{maxfiles})" | |
485 | if $self->{maxfiles} && (($self->{files} + $files) > $self->{maxfiles}); | |
486 | ||
487 | if ($commit) { | |
488 | $self->{files} += $files; | |
489 | $self->{size} += $sizediff; | |
490 | } | |
776097f1 | 491 | |
425ef29c DM |
492 | } |
493 | ||
494 | sub add_glob_mime_type { | |
495 | my ($self, $filename) = @_; | |
496 | ||
497 | if (my $ct = xdg_mime_get_mime_type_from_file_name (basename ($filename))) { | |
498 | $self->{mime}->{$ct} = 1 if $ct ne 'application/octet-stream'; | |
499 | } | |
500 | } | |
501 | ||
425ef29c DM |
502 | sub unpack_mime { |
503 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
504 | ||
505 | my $size = 0; | |
506 | my $files = 0; | |
507 | ||
508 | my $timeout = $self->check_timeout(); | |
509 | ||
510 | eval { | |
511 | run_with_timeout ($timeout, sub { | |
776097f1 | 512 | |
425ef29c | 513 | # Create a new MIME parser: |
18598b2c | 514 | my $max; |
425ef29c | 515 | if ($self->{maxfiles}) { |
18598b2c | 516 | $max = $self->{maxfiles} - $self->{files}; |
425ef29c DM |
517 | } |
518 | ||
18598b2c DC |
519 | my $parser = PMG::MIMEUtils::new_mime_parser({ |
520 | dumpdir => $tmpdir, | |
521 | nested => 1, | |
522 | ignore_errors => 1, | |
523 | extract_uuencode => 1, | |
524 | ignore_filename => 1, | |
525 | maxfiles => $max, | |
526 | }, 1); | |
527 | ||
425ef29c DM |
528 | my $entity = $parser->parse_open ($filename); |
529 | ||
18598b2c DC |
530 | PMG::MIMEUtils::traverse_mime_parts($entity, sub { |
531 | my ($part) = @_; | |
532 | my $ct = $part->head->mime_attr('content-type'); | |
533 | $self->{mime}->{$ct} = 1 if $ct && length($ct) < 256; | |
776097f1 | 534 | |
18598b2c DC |
535 | if (my $body = $part->bodyhandle) { |
536 | my $path = $body->path; | |
537 | $size += -s $path; | |
538 | $files++; | |
539 | } | |
540 | }); | |
425ef29c DM |
541 | }); |
542 | }; | |
543 | ||
544 | my $err = $@; | |
545 | ||
546 | die $err if $err; | |
547 | ||
548 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
549 | ||
550 | return 1; | |
551 | ||
552 | } | |
553 | ||
554 | sub unpack_zip { | |
555 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
556 | ||
557 | my $size = 0; | |
558 | my $files = 0; | |
559 | ||
560 | my $timeout = $self->check_timeout(); | |
561 | ||
562 | eval { | |
563 | ||
564 | my $zip = Archive::Zip->new (); | |
565 | ||
566 | Archive::Zip::setErrorHandler (sub { die @_ }); | |
567 | ||
568 | run_with_timeout ($timeout, sub { | |
569 | ||
570 | my $status = $zip->read ($filename); | |
571 | die "unable to open zip file '$filename'" if $status != AZ_OK; | |
776097f1 | 572 | |
425ef29c DM |
573 | my $tid = 1; |
574 | foreach my $mem ($zip->members) { | |
575 | ||
576 | $files++; | |
577 | ||
578 | my $cm = $mem->compressionMethod(); | |
579 | die "unsupported zip compression method '$cm'\n" | |
776097f1 | 580 | if !(($cm == COMPRESSION_DEFLATED || |
425ef29c DM |
581 | $cm == COMPRESSION_STORED)); |
582 | ||
583 | die "encrypted archive detected\n" | |
584 | if $mem->isEncrypted(); | |
585 | ||
586 | my $us = $mem->uncompressedSize(); | |
587 | ||
588 | next if $us <= 0; # skip zero size files | |
589 | ||
590 | if ($mem->isDirectory) { | |
591 | $size += realsize ($us, 1); | |
592 | } else { | |
593 | $size += realsize ($us); | |
594 | } | |
595 | ||
596 | $self->check_comp_ratio ($filesize, $size); | |
597 | ||
598 | $self->check_quota ($files, $size, $csize); | |
599 | ||
600 | next if $mem->isDirectory; # skip dirs | |
601 | ||
602 | my $name = basename ($mem->fileName()); | |
603 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
604 | my $newfn = sprintf "$tmpdir/Z%08d_$name", $tid++; | |
605 | ||
606 | $self->add_glob_mime_type ($name); | |
607 | ||
608 | my $outfd = IO::File->new; | |
609 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
776097f1 | 610 | die "unable to create file $newfn: $!"; |
425ef29c DM |
611 | } |
612 | ||
613 | my $ct; | |
614 | ||
615 | eval { | |
616 | ||
617 | $mem->desiredCompressionMethod (COMPRESSION_STORED); | |
618 | ||
619 | $status = $mem->rewindData(); | |
620 | ||
621 | die "unable to rewind zip stream" if $status != AZ_OK; | |
622 | ||
623 | my $outRef; | |
624 | my $bytes = 0; | |
625 | while ($status == AZ_OK) { | |
626 | ($outRef, $status) = $mem->readChunk(); | |
776097f1 | 627 | die "unable to read zip member" |
425ef29c DM |
628 | if ($status != AZ_OK && $status != AZ_STREAM_END); |
629 | ||
630 | my $len = length ($$outRef); | |
631 | if ($len > 0) { | |
632 | $ct = xdg_mime_get_mime_type_for_data ($$outRef, $len) if (!$bytes); | |
633 | $outfd->print ($$outRef) || die "write error during zip copy"; | |
634 | $bytes += $len; | |
635 | } | |
636 | ||
637 | last if $status == AZ_STREAM_END; | |
638 | } | |
639 | ||
776097f1 | 640 | $mem->endRead(); |
425ef29c DM |
641 | |
642 | $self->todo_list_add ($newfn, $ct, $bytes); | |
643 | ||
644 | }; | |
645 | ||
646 | my $err = $@; | |
647 | ||
648 | $outfd->close (); | |
649 | ||
650 | if ($err) { | |
651 | unlink $newfn; | |
652 | die $err; | |
653 | } | |
654 | } | |
655 | }); | |
656 | }; | |
657 | ||
658 | my $err = $@; | |
659 | ||
660 | die $err if $err; | |
661 | ||
662 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
663 | ||
664 | return 1; | |
665 | } | |
666 | ||
667 | sub unpack_tar { | |
668 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
669 | ||
670 | my $size = 0; | |
671 | my $files = 0; | |
672 | ||
673 | my $timeout = $self->check_timeout(); | |
674 | ||
675 | my $a = LibArchive::archive_read_new(); | |
676 | ||
677 | die "unable to create LibArchive object" if !$a; | |
678 | ||
679 | LibArchive::archive_read_support_format_all ($a); | |
680 | LibArchive::archive_read_support_compression_all ($a); | |
681 | ||
682 | eval { | |
683 | run_with_timeout ($timeout, sub { | |
684 | ||
685 | if ((my $r = LibArchive::archive_read_open_filename ($a, $filename, 10240))) { | |
776097f1 | 686 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); |
425ef29c DM |
687 | } |
688 | my $tid = 1; | |
689 | for (;;) { | |
690 | my $entry; | |
691 | my $r = LibArchive::archive_read_next_header ($a, $entry); | |
692 | ||
693 | last if ($r == LibArchive::ARCHIVE_EOF); | |
694 | ||
695 | if ($r != LibArchive::ARCHIVE_OK) { | |
696 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); | |
697 | } | |
698 | ||
699 | my $us = LibArchive::archive_entry_size ($entry); | |
700 | my $mode = LibArchive::archive_entry_mode ($entry); | |
701 | ||
702 | my $rs; | |
703 | if (POSIX::S_ISREG ($mode)) { | |
704 | $rs = realsize ($us); | |
705 | } else { | |
706 | $rs = POSIX::S_ISDIR ($mode) ? realsize ($us, 1) : 256; | |
707 | } | |
708 | $size += $rs; | |
709 | $files += 1; | |
710 | ||
711 | $self->check_comp_ratio ($filesize, $size); | |
712 | ||
713 | $self->check_quota ($files, $size, $csize); | |
714 | ||
715 | next if POSIX::S_ISDIR ($mode); | |
716 | next if !POSIX::S_ISREG ($mode); | |
717 | ||
718 | my $name = basename (LibArchive::archive_entry_pathname ($entry)); | |
719 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
720 | my $newfn = sprintf "$tmpdir/A%08d_$name", $tid++; | |
721 | ||
722 | $self->add_glob_mime_type ($name); | |
723 | ||
724 | my $outfd; | |
725 | ||
726 | eval { | |
727 | my $bytes = 0; | |
728 | my $ct; | |
729 | my $todo = 1; | |
730 | ||
731 | if ($us > 0) { | |
732 | my $len; | |
733 | my $buf; | |
734 | while (($len = LibArchive::archive_read_data($a, $buf, 128*1024)) > 0) { | |
776097f1 | 735 | |
425ef29c DM |
736 | if (!$bytes) { |
737 | if ($ct = xdg_mime_get_mime_type_for_data ($buf, $len)) { | |
738 | $self->{mime}->{$ct} = 1; | |
739 | ||
740 | if (!is_archive ($ct)) { | |
741 | $todo = 0; | |
742 | last if $self->{ctonly}; | |
743 | } | |
744 | } | |
745 | } | |
776097f1 | 746 | |
425ef29c DM |
747 | $bytes += $len; |
748 | ||
749 | if (!$outfd) { # create only when needed | |
750 | $outfd = IO::File->new; | |
776097f1 | 751 | |
425ef29c DM |
752 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { |
753 | die "unable to create file $newfn: $!"; | |
754 | } | |
755 | } | |
756 | ||
757 | if (!$outfd->print ($buf)) { | |
758 | die "unable to write '$newfn' - $!"; | |
759 | } | |
760 | } | |
761 | ||
762 | die ("error reading archive (encrypted)\n") | |
763 | if ($len < 0); | |
764 | } | |
765 | ||
766 | $self->todo_list_add ($newfn, $ct, $bytes) if $todo; | |
767 | }; | |
768 | ||
769 | my $err = $@; | |
770 | ||
771 | $outfd->close () if $outfd; | |
776097f1 | 772 | |
425ef29c DM |
773 | if ($err) { |
774 | unlink $newfn; | |
775 | die $err; | |
776 | } | |
776097f1 | 777 | } |
425ef29c DM |
778 | }); |
779 | }; | |
780 | ||
781 | my $err = $@; | |
782 | ||
783 | LibArchive::archive_read_close($a); | |
784 | LibArchive::archive_read_finish($a); | |
776097f1 | 785 | |
425ef29c DM |
786 | die $err if $err; |
787 | ||
788 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
789 | ||
790 | return 1; | |
791 | } | |
792 | ||
793 | sub generic_unpack { | |
794 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
795 | ||
796 | my $size = 0; | |
797 | my $files = 0; | |
798 | ||
799 | my $timeout = $self->check_timeout(); | |
800 | ||
801 | my @listcmd; | |
802 | my @restorecmd = ('/bin/false'); | |
803 | ||
776097f1 | 804 | my $filter; |
425ef29c DM |
805 | |
806 | if ($app eq 'tar') { | |
807 | @listcmd = ('/bin/tar', '-tvf', $filename); | |
808 | @restorecmd = ('/bin/tar', '-x', '--backup=number', "--transform='s,[^A-Za-z0-9\./],-,g'", '-o', | |
809 | '-m', '-C', $tmpdir, '-f', $filename); | |
810 | $filter = sub { | |
811 | my $line = shift; | |
812 | if ($line =~ m/^(\S)\S+\s+\S+\s+([\d,\.]+)\s+\S+/) { | |
813 | my ($type, $bytes) = ($1, $2); | |
814 | $bytes =~ s/[,\.]//g; | |
815 | ||
816 | if ($type eq 'd') { | |
817 | $bytes = realsize ($bytes, 1); | |
818 | } elsif ($type eq '-') { | |
819 | $bytes = realsize ($bytes); | |
820 | } else { | |
821 | $bytes = 256; # simple assumption | |
822 | } | |
823 | ||
824 | $size += $bytes; | |
825 | $files++; | |
826 | ||
827 | $self->check_comp_ratio ($filesize, $size); | |
828 | $self->check_quota ($files, $size, $csize); | |
829 | ||
830 | } else { | |
831 | die "can't parse tar output: $line\n"; | |
832 | } | |
833 | } | |
834 | } elsif ($app eq '7z' || $app eq '7zsimple') { | |
835 | # Note: set password to 'none' with '-pnone', to avoid reading from /dev/tty | |
836 | @restorecmd = ('/usr/bin/7z', 'e', '-pnone', '-bd', '-y', '-aou', "-w$self->{tmpdir}", "-o$tmpdir", $filename); | |
837 | ||
838 | @listcmd = ('/usr/bin/7z', 'l', '-slt', $filename); | |
839 | ||
840 | my ($path, $folder, $bytes); | |
841 | ||
842 | $filter = sub { | |
843 | my $line = shift; | |
844 | chomp $line; | |
845 | ||
846 | if ($line =~ m/^\s*\z/) { | |
847 | if (defined ($path) && defined ($bytes)) { | |
848 | $bytes = realsize ($bytes, $folder); | |
849 | $size += $bytes; | |
850 | $files++; | |
851 | ||
852 | $self->check_comp_ratio ($filesize, $size); | |
853 | $self->check_quota ($files, $size, $csize); | |
854 | } | |
855 | undef $path; | |
856 | undef $folder; | |
857 | undef $bytes; | |
776097f1 | 858 | |
425ef29c DM |
859 | } elsif ($line =~ m/^Path = (.*)\z/s) { |
860 | $path = $1; | |
861 | } elsif ($line =~ m/^Size = (\d+)\z/s) { | |
862 | $bytes = $1; | |
863 | } elsif ($line =~ m/^Folder = (\d+)\z/s) { | |
864 | $folder = $1; | |
865 | } elsif ($line =~ m/^Attributes = ([D\.][R\.][H\.][S\.][A\.])\z/s) { | |
866 | $folder = 1 if $1 && substr ($1, 0, 1) eq 'D'; | |
867 | } | |
868 | }; | |
869 | ||
870 | } elsif ($app eq 'tnef') { | |
871 | @listcmd = ('/usr/bin/tnef', '-tv', '-f', $filename); | |
872 | @restorecmd = ('/usr/bin/tnef', '-C', $tmpdir, '--number-backups', '-f', $filename); | |
873 | ||
874 | $filter = sub { | |
875 | my $line = shift; | |
876 | chomp $line; | |
877 | ||
878 | if ($line =~ m!^\s*(\d+)\s*|\s*\d{4}/\d{1,2}/\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2}\s*|!) { | |
879 | my $bytes = $1; | |
880 | ||
881 | $bytes = realsize ($bytes); | |
882 | $size += $bytes; | |
883 | $files++; | |
884 | ||
885 | $self->check_comp_ratio ($filesize, $size); | |
886 | $self->check_quota ($files, $size, $csize); | |
887 | } else { | |
888 | die "can't parse tnef output\n"; | |
889 | } | |
890 | ||
891 | }; | |
892 | ||
893 | } else { | |
894 | die "unknown application '$app'"; | |
895 | } | |
896 | ||
897 | eval { | |
898 | ||
899 | my $cfh = IO::File->new(); | |
900 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @listcmd); | |
776097f1 | 901 | |
425ef29c DM |
902 | helper_pipe_consume ($cfh, $pid, $timeout, 0, $filter); |
903 | }; | |
904 | ||
905 | my $err = $@; | |
906 | ||
907 | die $err if $err; | |
908 | ||
909 | return if !$files; # empty archive | |
910 | ||
911 | $self->check_quota ($files, $size, $csize, 1); | |
912 | ||
913 | $timeout = $self->check_timeout(); | |
914 | ||
915 | my $cfh = IO::File->new(); | |
916 | my $pid = helper_pipe_open ($cfh, '/dev/null', undef, @restorecmd); | |
917 | helper_pipe_consume ($cfh, $pid, $timeout, 0, sub { | |
918 | my $line = shift; | |
919 | print "$app: $line" if $self->{debug}; | |
920 | }); | |
921 | ||
922 | return 1; | |
923 | } | |
924 | ||
925 | sub unpack_dir { | |
926 | my ($self, $dirname, $level) = @_; | |
927 | ||
928 | local (*DIR); | |
929 | ||
930 | print "unpack dir '$dirname'\n" if $self->{debug}; | |
931 | ||
932 | opendir(DIR, $dirname) || die "can't opendir $dirname: $!"; | |
933 | ||
934 | my $name; | |
935 | ||
776097f1 | 936 | while (defined ($name = readdir (DIR))) { |
425ef29c DM |
937 | my $path = "$dirname/$name"; |
938 | my $st = lstat ($path); | |
939 | ||
940 | if (!$st) { | |
941 | die "no such file '$path' - $!"; | |
942 | } elsif (POSIX::S_ISDIR ($st->mode)) { | |
943 | next if ($name eq '.' || $name eq '..'); | |
944 | $self->unpack_dir ($path, $level); | |
945 | } elsif (POSIX::S_ISREG ($st->mode)) { | |
946 | my $size = $st->size; | |
947 | $self->__unpack_archive ($path, $level + 1, $size); | |
948 | } | |
949 | } | |
950 | ||
951 | closedir DIR; | |
952 | } | |
953 | ||
954 | sub unpack_todo { | |
955 | my ($self, $level) = @_; | |
956 | ||
957 | my $ta = $self->{todo}; | |
958 | $self->{todo} = []; | |
959 | ||
960 | foreach my $todo (@$ta) { | |
961 | $self->__unpack_archive ($todo->[0], $level, $todo->[2], $todo->[1]); | |
962 | } | |
963 | } | |
964 | ||
965 | sub __unpack_archive { | |
966 | my ($self, $filename, $level, $size, $ct) = @_; | |
967 | ||
968 | $level = 0 if !$level; | |
969 | ||
776097f1 | 970 | $self->{levels} = max2($self->{levels}, $level); |
425ef29c DM |
971 | |
972 | if ($self->{maxrec} && ($level >= $self->{maxrec})) { | |
776097f1 | 973 | return if $self->{maxrec_soft}; |
425ef29c DM |
974 | die "max recursion limit reached\n"; |
975 | } | |
976 | ||
977 | die "undefined file size" if !defined ($size); | |
978 | ||
979 | return if !$size; # nothing to do | |
980 | ||
981 | if (!$ct) { | |
91a5af02 DM |
982 | $ct = PMG::Utils::magic_mime_type_for_file($filename); |
983 | $self->add_glob_mime_type($filename); | |
425ef29c DM |
984 | } |
985 | ||
986 | if ($ct) { | |
987 | $self->{mime}->{$ct} = 1; | |
988 | ||
776097f1 | 989 | if (defined($decompressors->{$ct})) { |
425ef29c DM |
990 | |
991 | my ($app, $code) = @{$decompressors->{$ct}}; | |
992 | ||
993 | if ($app) { | |
994 | ||
995 | # we try to keep extension correctly | |
776097f1 DM |
996 | my $tmp = basename($filename); |
997 | ($ct eq 'application/gzip') && | |
425ef29c | 998 | $tmp =~ s/\.gz\z//; |
776097f1 | 999 | ($ct eq 'application/x-bzip') && |
425ef29c | 1000 | $tmp =~ s/\.bz2?\z//; |
776097f1 | 1001 | ($ct eq 'application/x-compress') && |
425ef29c | 1002 | $tmp =~ s/\.Z\z//; |
776097f1 | 1003 | ($ct eq 'application/x-compressed-tar') && |
425ef29c | 1004 | $tmp =~ s/\.gz\z// || $tmp =~ s/\.tgz\z/.tar/; |
776097f1 | 1005 | ($ct eq 'application/x-bzip-compressed-tar') && |
425ef29c DM |
1006 | $tmp =~ s/\.bz2?\z// || $tmp =~ s/\.tbz\z/.tar/; |
1007 | ($ct eq 'application/x-tarz') && | |
1008 | $tmp =~ s/\.Z\z//; | |
1009 | ||
776097f1 | 1010 | my $newname = sprintf "%s/DC_%08d_%s", $self->{tmpdir}, ++$self->{ufid}, $tmp; |
425ef29c DM |
1011 | |
1012 | print "Decomp: $filename\n\t($ct) with $app to $newname\n" | |
776097f1 | 1013 | if $self->{debug}; |
425ef29c | 1014 | |
776097f1 | 1015 | if (my $res = &$code($self, $app, $filename, $newname, $level ? $size : 0, $size)) { |
425ef29c DM |
1016 | unlink $filename if $level; |
1017 | $self->unpack_todo ($level + 1); | |
1018 | } | |
1019 | } | |
1020 | } elsif (defined ($unpackers->{$ct})) { | |
1021 | ||
1022 | my ($app, $code, $ctdetect) = @{$unpackers->{$ct}}; | |
1023 | ||
1024 | if ($app) { | |
1025 | ||
776097f1 | 1026 | my $tmpdir = sprintf "%s/DIR_%08d", $self->{tmpdir}, ++$self->{udid}; |
425ef29c | 1027 | mkdir $tmpdir; |
776097f1 | 1028 | |
425ef29c DM |
1029 | print "Unpack: $filename\n\t($ct) with $app to $tmpdir\n" |
1030 | if $self->{debug}; | |
1031 | ||
1032 | if (my $res = &$code ($self, $app, $filename, $tmpdir, $level ? $size : 0, $size)) { | |
1033 | unlink $filename if $level; | |
1034 | ||
1035 | if ($ctdetect) { | |
1036 | $self->unpack_todo ($level + 1); | |
1037 | } else { | |
1038 | $self->unpack_dir ($tmpdir, $level); | |
1039 | } | |
1040 | } | |
1041 | } | |
1042 | } | |
1043 | } | |
1044 | } | |
1045 | ||
1046 | sub is_archive { | |
776097f1 | 1047 | my ($ct) = @_; |
425ef29c | 1048 | |
776097f1 | 1049 | return defined($decompressors->{$ct}) || defined($unpackers->{$ct}); |
425ef29c DM |
1050 | } |
1051 | ||
1052 | # unpack_archive | |
1053 | # | |
1054 | # Description: unpacks an archive and records containing | |
1055 | # content types (detected by magic numbers and file extension) | |
776097f1 DM |
1056 | # Extracted files are stored inside 'tempdir'. |
1057 | # | |
425ef29c DM |
1058 | # returns: true if file is archive, undef otherwhise |
1059 | ||
1060 | sub unpack_archive { | |
1061 | my ($self, $filename, $ct) = @_; | |
1062 | ||
776097f1 | 1063 | my $st = lstat($filename); |
425ef29c DM |
1064 | my $size = 0; |
1065 | ||
1066 | if (!$st) { | |
1067 | die "no such file '$filename' - $!"; | |
776097f1 | 1068 | } elsif (POSIX::S_ISREG($st->mode)) { |
425ef29c DM |
1069 | $size = $st->size; |
1070 | ||
1071 | return if !$size; # do nothing | |
1072 | ||
1073 | $self->{quota} = $self->{maxquota} - $self->{size}; | |
1074 | ||
1075 | $self->{ratioquota} = $size * $self->{maxratio} if $self->{maxratio}; | |
776097f1 | 1076 | |
425ef29c DM |
1077 | } else { |
1078 | return; # do nothing | |
1079 | } | |
776097f1 | 1080 | |
91a5af02 | 1081 | $ct = PMG::Utils::magic_mime_type_for_file($filename) if !$ct; |
425ef29c | 1082 | |
776097f1 | 1083 | return if (!$ct || !is_archive($ct)); # not an archive |
425ef29c DM |
1084 | |
1085 | eval { | |
776097f1 | 1086 | $self->__unpack_archive($filename, 0, $st->size, $ct); |
425ef29c DM |
1087 | }; |
1088 | ||
1089 | my $err = $@; | |
1090 | ||
776097f1 | 1091 | printf "ELAPSED: %.2f ms $filename\n", |
425ef29c DM |
1092 | int(tv_interval ($self->{starttime}) * 1000) |
1093 | if $self->{debug}; | |
1094 | ||
1095 | if ($err) { | |
776097f1 | 1096 | $self->{mime}->{'proxmox/unreadable-archive'} = 1; |
425ef29c DM |
1097 | die $err; |
1098 | } | |
1099 | return 1; | |
1100 | } | |
1101 | ||
1102 | 1; | |
1103 | __END__ |