]>
Commit | Line | Data |
---|---|---|
91a5af02 | 1 | package PMG::Unpack; |
425ef29c DM |
2 | |
3 | use strict; | |
91a5af02 | 4 | use warnings; |
425ef29c DM |
5 | use IO::File; |
6 | use IO::Select; | |
7 | use Xdgmime; | |
91a5af02 DM |
8 | use Compress::Zlib qw(gzopen); |
9 | use Compress::Bzip2 qw(bzopen); | |
425ef29c | 10 | use File::Path; |
91a5af02 | 11 | use File::Temp qw(tempdir); |
425ef29c DM |
12 | use File::Basename; |
13 | use File::stat; | |
14 | use POSIX ":sys_wait_h"; | |
91a5af02 | 15 | use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); |
425ef29c DM |
16 | use Archive::Zip qw(:CONSTANTS :ERROR_CODES); |
17 | use LibArchive; | |
425ef29c DM |
18 | use MIME::Parser; |
19 | ||
91a5af02 DM |
20 | use PMG::Utils; |
21 | ||
425ef29c DM |
22 | my $unpackers = { |
23 | ||
24 | # TAR | |
25 | 'application/x-tar' => [ 'tar', \&unpack_tar, 1], | |
26 | #'application/x-tar' => [ 'tar', \&generic_unpack ], | |
27 | #'application/x-tar' => [ '7z', \&generic_unpack ], | |
28 | 'application/x-compressed-tar' => [ 'tar', \&unpack_tar, 1], | |
29 | ||
30 | # CPIO | |
31 | 'application/x-cpio' => [ 'cpio', \&unpack_tar, 1], | |
32 | #'application/x-cpio' => [ '7z', \&generic_unpack ], | |
33 | ||
34 | # ZIP | |
776097f1 DM |
35 | #'application/zip' => [ 'zip', \&unpack_tar, 1], |
36 | 'application/zip' => [ '7z', \&generic_unpack ], | |
425ef29c DM |
37 | |
38 | # 7z | |
39 | 'application/x-7z-compressed' => [ '7z', \&generic_unpack ], | |
40 | ||
41 | # RAR | |
776097f1 | 42 | 'application/vnd.rar' => [ '7z', \&generic_unpack ], |
425ef29c DM |
43 | |
44 | # ARJ | |
45 | 'application/x-arj' => [ '7z', \&generic_unpack ], | |
46 | ||
47 | # RPM | |
48 | 'application/x-rpm' => [ '7z', \&generic_unpack ], | |
49 | ||
50 | # DEB | |
776097f1 | 51 | 'application/vnd.debian.binary-package' => [ 'ar', \&unpack_tar, 1], |
425ef29c DM |
52 | |
53 | # MS CAB | |
54 | 'application/vnd.ms-cab-compressed' => [ '7z', \&generic_unpack ], | |
55 | ||
56 | # LZH/LHA | |
57 | 'application/x-lha' => [ '7z', \&generic_unpack ], | |
58 | ||
59 | # TNEF (winmail.dat) | |
60 | 'application/vnd.ms-tnef' => [ 'tnef', \&generic_unpack ], | |
61 | ||
62 | # message/rfc822 | |
63 | 'message/rfc822' => [ 'mime', \&unpack_mime ], | |
64 | ||
65 | ## CHM, Nsis - supported by 7z, but we currently do not | |
66 | ||
67 | ##'application/x-zoo' - old format - no support | |
68 | ##'application/x-ms-dos-executable' - exe should be blocked anyways - no support | |
69 | ## application/x-arc - old format - no support | |
70 | }; | |
71 | ||
72 | my $decompressors = { | |
776097f1 | 73 | 'application/gzip' => [ 'guzip', \&uncompress_file ], |
425ef29c DM |
74 | 'application/x-compress' => [ 'uncompress', \&uncompress_file ], |
75 | # 'application/x-compressed-tar' => [ 'guzip', \&uncompress_file ], # unpack_tar is faster | |
76 | 'application/x-tarz' => [ 'uncompress', \&uncompress_file ], | |
77 | 'application/x-bzip' => [ 'bunzip2', \&uncompress_file ], | |
78 | 'application/x-bzip-compressed-tar' => [ 'bunzip2', \&uncompress_file ], | |
425ef29c DM |
79 | }; |
80 | ||
81 | ||
82 | ## some helper methods | |
83 | ||
84 | sub min2 { | |
85 | return ( $_[0] < $_[1]) ? $_[0] : $_[1]; | |
86 | } | |
87 | ||
88 | sub max2 { | |
89 | return ( $_[0] > $_[1]) ? $_[0] : $_[1]; | |
90 | } | |
91 | ||
92 | # STDERR is redirected to STDOUT by default | |
93 | sub helper_pipe_open { | |
94 | my ($fh, $inputfilename, $errorfilename, @cmd) = @_; | |
95 | ||
96 | my $pid = $fh->open ('-|'); | |
776097f1 | 97 | |
425ef29c DM |
98 | die "unable to fork helper process: $!" if !defined $pid; |
99 | ||
100 | return $pid if ($pid != 0); # parent process simply returns | |
776097f1 | 101 | |
425ef29c DM |
102 | $inputfilename = '/dev/null' if !$inputfilename; |
103 | ||
104 | # same algorythm as used inside SA | |
105 | ||
106 | my $fd = fileno (STDIN); | |
107 | close STDIN; | |
108 | POSIX::close(0) if $fd != 0; | |
109 | ||
110 | if (!open (STDIN, "<$inputfilename")) { | |
776097f1 DM |
111 | POSIX::_exit (1); |
112 | kill ('KILL', $$); | |
425ef29c DM |
113 | } |
114 | ||
115 | $errorfilename = '&STDOUT' if !$errorfilename; | |
116 | ||
117 | $fd = fileno(STDERR); | |
118 | close STDERR; | |
119 | POSIX::close(2) if $fd != 2; | |
120 | ||
121 | if (!open (STDERR, ">$errorfilename")) { | |
776097f1 DM |
122 | POSIX::_exit (1); |
123 | kill ('KILL', $$); | |
425ef29c DM |
124 | } |
125 | ||
126 | exec @cmd; | |
776097f1 | 127 | |
425ef29c DM |
128 | warn "exec failed"; |
129 | ||
776097f1 DM |
130 | POSIX::_exit (1); |
131 | kill('KILL', $$); | |
425ef29c DM |
132 | die; # else -w complains |
133 | } | |
134 | ||
135 | sub helper_pipe_consume { | |
136 | my ($cfh, $pid, $timeout, $bufsize, $callback) = @_; | |
137 | ||
138 | eval { | |
139 | run_with_timeout ($timeout, sub { | |
140 | if ($bufsize) { | |
141 | my $buf; | |
142 | my $count; | |
143 | ||
144 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
145 | &$callback ($buf, $count); | |
146 | } | |
147 | die "pipe read failed" if ($count < 0); | |
148 | ||
149 | } else { | |
150 | while (my $line = <$cfh>) { | |
151 | &$callback ($line); | |
152 | } | |
153 | } | |
154 | }); | |
155 | }; | |
156 | ||
157 | my $err = $@; | |
776097f1 | 158 | |
425ef29c DM |
159 | # send TERM first if process still exits |
160 | if ($err) { | |
161 | kill (15, $pid) if kill (0, $pid); | |
162 | ||
163 | # read remaining data, if any | |
164 | my ($count, $buf); | |
165 | while (($count = $cfh->sysread ($buf, $bufsize)) > 0) { | |
166 | # do nothing | |
167 | } | |
168 | } | |
776097f1 | 169 | |
425ef29c DM |
170 | # then close pipe |
171 | my $closeerr; | |
172 | close ($cfh) || ($closeerr = $!); | |
173 | my $childstat = $?; | |
174 | ||
175 | # still alive ? | |
176 | if (kill (0, $pid)) { | |
177 | sleep (1); | |
178 | kill (9, $pid); # terminate process | |
179 | die "child '$pid' termination problems\n"; | |
180 | } | |
181 | ||
182 | die $err if $err; | |
183 | ||
184 | die "child '$pid' close failed - $closeerr\n" if $closeerr; | |
185 | ||
186 | die "child '$pid' failed: $childstat\n" if $childstat; | |
187 | } | |
188 | ||
189 | sub run_with_timeout { | |
190 | my ($timeout, $code, @param) = @_; | |
191 | ||
192 | die "got timeout\n" if $timeout <= 0; | |
193 | ||
194 | my $prev_alarm; | |
195 | ||
196 | my $sigcount = 0; | |
197 | ||
198 | my $res; | |
199 | ||
200 | eval { | |
201 | local $SIG{ALRM} = sub { $sigcount++; die "got timeout\n"; }; | |
202 | local $SIG{PIPE} = sub { $sigcount++; die "broken pipe\n" }; | |
203 | local $SIG{__DIE__}; # see SA bug 4631 | |
204 | ||
205 | $prev_alarm = alarm ($timeout); | |
206 | ||
207 | $res = &$code (@param); | |
208 | ||
209 | alarm 0; # avoid race conditions | |
210 | }; | |
211 | ||
212 | my $err = $@; | |
776097f1 | 213 | |
425ef29c DM |
214 | alarm ($prev_alarm) if defined ($prev_alarm); |
215 | ||
216 | die "unknown error" if $sigcount && !$err; # seems to happen sometimes | |
217 | ||
218 | die $err if $err; | |
219 | ||
220 | return $res; | |
221 | } | |
222 | ||
223 | # the unpacker object constructor | |
224 | ||
225 | sub new { | |
226 | my ($type, %param) = @_; | |
227 | ||
228 | my $self = {}; | |
229 | bless $self, $type; | |
230 | ||
231 | $self->{tmpdir} = $param{tmpdir} || tempdir (CLEANUP => 1); | |
232 | $self->{starttime} = [gettimeofday]; | |
233 | $self->{timeout} = $param{timeout} || 3600*24; | |
234 | ||
235 | # maxfiles: 0 = disabled | |
236 | $self->{maxfiles} = defined ($param{maxfiles}) ? $param{maxfiles} : 1000; | |
237 | ||
238 | $param{maxrec} = 0 if !defined ($param{maxrec}); | |
239 | if ($param{maxrec} < 0) { | |
240 | $param{maxrec} = - $param{maxrec}; | |
241 | $self->{maxrec_soft} = 1; # do not die when limit reached | |
242 | } | |
243 | ||
244 | $self->{maxrec} = $param{maxrec} || 8; # 0 = disabled | |
245 | $self->{maxratio} = $param{maxratio} || 0; # 0 = disabled | |
246 | ||
776097f1 | 247 | $self->{maxquota} = $param{quota} || 250*1024*1024; # 250 MB |
425ef29c DM |
248 | |
249 | $self->{ctonly} = $param{ctonly}; # only detect contained content types | |
250 | ||
251 | # internal | |
776097f1 | 252 | $self->{quota} = 0; |
425ef29c DM |
253 | $self->{ratioquota} = 0; |
254 | $self->{size} = 0; | |
255 | $self->{files} = 0; | |
256 | $self->{levels} = 0; | |
257 | ||
258 | $self->{debug} = $param{debug} || 0; | |
259 | ||
260 | $self->{mime} = {}; | |
261 | ||
262 | $self->{ufid} = 0; # counter to creat unique file names | |
263 | $self->{udid} = 0; # counter to creat unique dir names | |
264 | $self->{ulid} = 0; # counter to creat unique link names | |
265 | ||
266 | $self->{todo} = []; | |
267 | ||
268 | return $self; | |
269 | } | |
270 | ||
271 | sub cleanup { | |
272 | my $self = shift; | |
273 | ||
274 | if ($self->{debug}) { | |
275 | system ("find '$self->{tmpdir}'"); | |
276 | } | |
277 | ||
278 | rmtree ($self->{tmpdir}); | |
279 | } | |
280 | ||
281 | sub DESTROY { | |
282 | my $self = shift; | |
283 | ||
284 | rmtree ($self->{tmpdir}); | |
285 | } | |
286 | ||
287 | ||
288 | sub uncompress_file { | |
289 | my ($self, $app, $filename, $newname, $csize, $filesize) = @_; | |
290 | ||
291 | my $timeout = $self->check_timeout(); | |
292 | ||
293 | my $maxsize = $self->{quota} - $self->{size}; | |
294 | ||
295 | if ($self->{maxratio}) { | |
296 | $maxsize = min2 ($maxsize, $filesize * $self->{maxratio}); | |
297 | } | |
298 | ||
299 | $self->add_glob_mime_type ($newname); | |
300 | ||
301 | my $outfd; | |
302 | ||
303 | my $usize = 0; | |
304 | my $err; | |
305 | my $ct; | |
306 | my $todo = 1; | |
307 | ||
308 | if ($app eq 'guzip' || $app eq 'bunzip2') { | |
309 | ||
310 | my $cfh; | |
776097f1 | 311 | |
425ef29c DM |
312 | eval { |
313 | ||
314 | # bzip provides a gz compatible interface | |
776097f1 | 315 | if ($app eq 'bunzip2') { |
425ef29c DM |
316 | $self->{mime}->{'application/x-bzip'} = 1; |
317 | $cfh = bzopen ("$filename", 'r'); | |
318 | die "bzopen '$filename' failed" if !$cfh; | |
319 | } else { | |
776097f1 | 320 | $self->{mime}->{'application/gzip'} = 1; |
425ef29c DM |
321 | $cfh = gzopen ("$filename", 'rb'); |
322 | die "gzopen '$filename' failed" if !$cfh; | |
323 | } | |
776097f1 | 324 | |
425ef29c DM |
325 | run_with_timeout ($timeout, sub { |
326 | my $count; | |
327 | my $buf; | |
328 | while (($count = $cfh->gzread ($buf, 128*1024)) > 0) { | |
329 | ||
330 | if (!$usize) { | |
331 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count); | |
332 | ||
333 | $usize += $count; | |
334 | $self->{mime}->{$ct} = 1; | |
335 | ||
336 | if (!is_archive ($ct)) { | |
337 | $todo = 0; | |
338 | ||
339 | # warning: this can lead to wrong size/quota test | |
340 | last if $self->{ctonly}; | |
341 | } | |
342 | } else { | |
343 | $usize += $count; | |
344 | } | |
345 | ||
346 | $self->check_comp_ratio ($filesize, $usize); | |
347 | ||
348 | $self->check_quota (1, $usize, $csize); | |
349 | ||
350 | if (!$outfd) { | |
351 | $outfd = IO::File->new; | |
352 | ||
353 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
354 | die "unable to create file $newname: $!"; | |
355 | } | |
356 | } | |
357 | ||
358 | if (!$outfd->print ($buf)) { | |
359 | die "unable to write '$newname' - $!"; | |
360 | } | |
361 | } | |
362 | if ($count < 0) { | |
363 | die "gzread failed"; | |
364 | } | |
365 | }); | |
366 | }; | |
367 | ||
368 | $err = $@; | |
369 | ||
370 | $cfh->gzclose(); | |
371 | ||
372 | } elsif ($app eq 'uncompress') { | |
373 | ||
374 | $self->{mime}->{'application/x-compress'} = 1; | |
375 | ||
376 | eval { | |
377 | my @cmd = ('/bin/gunzip', '-c', $filename); | |
378 | my $cfh = IO::File->new(); | |
379 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @cmd); | |
380 | ||
381 | helper_pipe_consume ($cfh, $pid, $timeout, 128*1024, sub { | |
382 | my ($buf, $count) = @_; | |
383 | ||
384 | $ct = xdg_mime_get_mime_type_for_data ($buf, $count) if (!$usize); | |
385 | ||
386 | $usize += $count; | |
387 | ||
388 | $self->check_comp_ratio ($filesize, $usize); | |
389 | ||
390 | $self->check_quota (1, $usize, $csize); | |
391 | ||
392 | if (!$outfd) { | |
393 | $outfd = IO::File->new; | |
394 | ||
395 | if (!$outfd->open ($newname, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
396 | die "unable to create file $newname: $!"; | |
397 | } | |
398 | } | |
399 | ||
400 | if (!$outfd->print ($buf)) { | |
401 | die "unable to write '$newname' - $!"; | |
402 | } | |
403 | ||
404 | }); | |
405 | }; | |
776097f1 | 406 | |
425ef29c DM |
407 | $err = $@; |
408 | } | |
409 | ||
410 | $outfd->close () if $outfd; | |
411 | ||
412 | if ($err) { | |
413 | unlink $newname; | |
414 | die $err; | |
415 | } | |
416 | ||
417 | $self->check_quota (1, $usize, $csize, 1); | |
418 | ||
419 | $self->todo_list_add ($newname, $ct, $usize); | |
420 | ||
421 | return $newname; | |
422 | }; | |
423 | ||
424 | # calculate real filesystem space (needed by ext3 to store files/dirs) | |
425 | sub realsize { | |
426 | my ($size, $isdir) = @_; | |
427 | ||
428 | my $bs = 4096; # ext3 block size | |
429 | ||
430 | $size = max2 ($size, $bs) if $isdir; # dirs needs at least one block | |
431 | ||
432 | return int (($size + $bs - 1) / $bs) * $bs; # round up to block size | |
433 | } | |
434 | ||
435 | sub todo_list_add { | |
436 | my ($self, $filename, $ct, $size) = @_; | |
437 | ||
438 | if ($ct) { | |
439 | $self->{mime}->{$ct} = 1; | |
440 | if (is_archive ($ct)) { | |
441 | push @{$self->{todo}}, [$filename, $ct, $size]; | |
442 | } | |
443 | } | |
444 | } | |
445 | ||
446 | sub check_timeout { | |
447 | my ($self) = @_; | |
776097f1 | 448 | |
425ef29c DM |
449 | my $elapsed = int (tv_interval ($self->{starttime})); |
450 | my $timeout = $self->{timeout} - $elapsed; | |
451 | ||
452 | die "got timeout\n" if $timeout <= 0; | |
453 | ||
454 | return $timeout; | |
455 | } | |
456 | ||
457 | sub check_comp_ratio { | |
458 | my ($self, $compsize, $usize) = @_; | |
459 | ||
460 | return if !$compsize || !$self->{maxratio}; | |
461 | ||
462 | my $ratio = $usize/$compsize; | |
463 | ||
464 | die "compresion ratio too large (> $self->{maxratio})" | |
465 | if $ratio > $self->{maxratio}; | |
466 | } | |
467 | ||
468 | sub check_quota { | |
469 | my ($self, $files, $size, $csize, $commit) = @_; | |
470 | ||
471 | my $sizediff = $csize ? $size - $csize : $size; | |
472 | ||
473 | die "compresion ratio too large (> $self->{maxratio})" | |
474 | if $self->{maxratio} && (($self->{size} + $sizediff) > $self->{ratioquota}); | |
475 | ||
776097f1 | 476 | die "archive too large (> $self->{quota})" |
425ef29c DM |
477 | if ($self->{size} + $sizediff) > $self->{quota}; |
478 | ||
479 | die "unexpected number of files '$files'" if $files <= 0; | |
480 | ||
481 | $files-- if ($csize); | |
482 | ||
483 | die "too many files in archive (> $self->{maxfiles})" | |
484 | if $self->{maxfiles} && (($self->{files} + $files) > $self->{maxfiles}); | |
485 | ||
486 | if ($commit) { | |
487 | $self->{files} += $files; | |
488 | $self->{size} += $sizediff; | |
489 | } | |
776097f1 | 490 | |
425ef29c DM |
491 | } |
492 | ||
493 | sub add_glob_mime_type { | |
494 | my ($self, $filename) = @_; | |
495 | ||
496 | if (my $ct = xdg_mime_get_mime_type_from_file_name (basename ($filename))) { | |
497 | $self->{mime}->{$ct} = 1 if $ct ne 'application/octet-stream'; | |
498 | } | |
499 | } | |
500 | ||
501 | sub walk_mime_entity { | |
502 | my ($self, $entity) = @_; | |
503 | ||
504 | my $count = 0; | |
505 | my $size = 0; | |
506 | ||
507 | my $ct = $entity->head->mime_attr ('content-type'); | |
508 | $self->{mime}->{$ct} = 1 if $ct && length ($ct) < 256; | |
509 | ||
510 | if (my $body = $entity->bodyhandle) { | |
511 | my $path = $body->path; | |
512 | $size = -s $path; | |
513 | $count = 1; | |
514 | } | |
515 | ||
516 | foreach my $part ($entity->parts) { | |
517 | if (my ($n, $s) = $self->walk_mime_entity ($part)) { | |
518 | $count += $n; | |
519 | $size += $s; | |
520 | } | |
521 | } | |
522 | ||
523 | return ($count, $size); | |
524 | } | |
525 | ||
526 | ||
527 | sub unpack_mime { | |
528 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
529 | ||
530 | my $size = 0; | |
531 | my $files = 0; | |
532 | ||
533 | my $timeout = $self->check_timeout(); | |
534 | ||
535 | eval { | |
536 | run_with_timeout ($timeout, sub { | |
776097f1 | 537 | |
425ef29c DM |
538 | # Create a new MIME parser: |
539 | my $parser = new MIME::Parser; | |
540 | $parser->output_under ($tmpdir); | |
776097f1 | 541 | $parser->extract_nested_messages (1); |
425ef29c DM |
542 | $parser->ignore_errors (1); |
543 | $parser->extract_uuencode (1); | |
544 | $parser->filer->ignore_filename(1); | |
545 | ||
546 | if ($self->{maxfiles}) { | |
547 | my $max = $self->{maxfiles} - $self->{files}; | |
548 | $parser->max_parts ($max); | |
549 | } | |
550 | ||
551 | my $entity = $parser->parse_open ($filename); | |
552 | ||
553 | ($files, $size) = $self->walk_mime_entity ($entity); | |
776097f1 | 554 | |
425ef29c DM |
555 | }); |
556 | }; | |
557 | ||
558 | my $err = $@; | |
559 | ||
560 | die $err if $err; | |
561 | ||
562 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
563 | ||
564 | return 1; | |
565 | ||
566 | } | |
567 | ||
568 | sub unpack_zip { | |
569 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
570 | ||
571 | my $size = 0; | |
572 | my $files = 0; | |
573 | ||
574 | my $timeout = $self->check_timeout(); | |
575 | ||
576 | eval { | |
577 | ||
578 | my $zip = Archive::Zip->new (); | |
579 | ||
580 | Archive::Zip::setErrorHandler (sub { die @_ }); | |
581 | ||
582 | run_with_timeout ($timeout, sub { | |
583 | ||
584 | my $status = $zip->read ($filename); | |
585 | die "unable to open zip file '$filename'" if $status != AZ_OK; | |
776097f1 | 586 | |
425ef29c DM |
587 | my $tid = 1; |
588 | foreach my $mem ($zip->members) { | |
589 | ||
590 | $files++; | |
591 | ||
592 | my $cm = $mem->compressionMethod(); | |
593 | die "unsupported zip compression method '$cm'\n" | |
776097f1 | 594 | if !(($cm == COMPRESSION_DEFLATED || |
425ef29c DM |
595 | $cm == COMPRESSION_STORED)); |
596 | ||
597 | die "encrypted archive detected\n" | |
598 | if $mem->isEncrypted(); | |
599 | ||
600 | my $us = $mem->uncompressedSize(); | |
601 | ||
602 | next if $us <= 0; # skip zero size files | |
603 | ||
604 | if ($mem->isDirectory) { | |
605 | $size += realsize ($us, 1); | |
606 | } else { | |
607 | $size += realsize ($us); | |
608 | } | |
609 | ||
610 | $self->check_comp_ratio ($filesize, $size); | |
611 | ||
612 | $self->check_quota ($files, $size, $csize); | |
613 | ||
614 | next if $mem->isDirectory; # skip dirs | |
615 | ||
616 | my $name = basename ($mem->fileName()); | |
617 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
618 | my $newfn = sprintf "$tmpdir/Z%08d_$name", $tid++; | |
619 | ||
620 | $self->add_glob_mime_type ($name); | |
621 | ||
622 | my $outfd = IO::File->new; | |
623 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { | |
776097f1 | 624 | die "unable to create file $newfn: $!"; |
425ef29c DM |
625 | } |
626 | ||
627 | my $ct; | |
628 | ||
629 | eval { | |
630 | ||
631 | $mem->desiredCompressionMethod (COMPRESSION_STORED); | |
632 | ||
633 | $status = $mem->rewindData(); | |
634 | ||
635 | die "unable to rewind zip stream" if $status != AZ_OK; | |
636 | ||
637 | my $outRef; | |
638 | my $bytes = 0; | |
639 | while ($status == AZ_OK) { | |
640 | ($outRef, $status) = $mem->readChunk(); | |
776097f1 | 641 | die "unable to read zip member" |
425ef29c DM |
642 | if ($status != AZ_OK && $status != AZ_STREAM_END); |
643 | ||
644 | my $len = length ($$outRef); | |
645 | if ($len > 0) { | |
646 | $ct = xdg_mime_get_mime_type_for_data ($$outRef, $len) if (!$bytes); | |
647 | $outfd->print ($$outRef) || die "write error during zip copy"; | |
648 | $bytes += $len; | |
649 | } | |
650 | ||
651 | last if $status == AZ_STREAM_END; | |
652 | } | |
653 | ||
776097f1 | 654 | $mem->endRead(); |
425ef29c DM |
655 | |
656 | $self->todo_list_add ($newfn, $ct, $bytes); | |
657 | ||
658 | }; | |
659 | ||
660 | my $err = $@; | |
661 | ||
662 | $outfd->close (); | |
663 | ||
664 | if ($err) { | |
665 | unlink $newfn; | |
666 | die $err; | |
667 | } | |
668 | } | |
669 | }); | |
670 | }; | |
671 | ||
672 | my $err = $@; | |
673 | ||
674 | die $err if $err; | |
675 | ||
676 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
677 | ||
678 | return 1; | |
679 | } | |
680 | ||
681 | sub unpack_tar { | |
682 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
683 | ||
684 | my $size = 0; | |
685 | my $files = 0; | |
686 | ||
687 | my $timeout = $self->check_timeout(); | |
688 | ||
689 | my $a = LibArchive::archive_read_new(); | |
690 | ||
691 | die "unable to create LibArchive object" if !$a; | |
692 | ||
693 | LibArchive::archive_read_support_format_all ($a); | |
694 | LibArchive::archive_read_support_compression_all ($a); | |
695 | ||
696 | eval { | |
697 | run_with_timeout ($timeout, sub { | |
698 | ||
699 | if ((my $r = LibArchive::archive_read_open_filename ($a, $filename, 10240))) { | |
776097f1 | 700 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); |
425ef29c DM |
701 | } |
702 | my $tid = 1; | |
703 | for (;;) { | |
704 | my $entry; | |
705 | my $r = LibArchive::archive_read_next_header ($a, $entry); | |
706 | ||
707 | last if ($r == LibArchive::ARCHIVE_EOF); | |
708 | ||
709 | if ($r != LibArchive::ARCHIVE_OK) { | |
710 | die "LibArchive error: %s", LibArchive::archive_error_string ($a); | |
711 | } | |
712 | ||
713 | my $us = LibArchive::archive_entry_size ($entry); | |
714 | my $mode = LibArchive::archive_entry_mode ($entry); | |
715 | ||
716 | my $rs; | |
717 | if (POSIX::S_ISREG ($mode)) { | |
718 | $rs = realsize ($us); | |
719 | } else { | |
720 | $rs = POSIX::S_ISDIR ($mode) ? realsize ($us, 1) : 256; | |
721 | } | |
722 | $size += $rs; | |
723 | $files += 1; | |
724 | ||
725 | $self->check_comp_ratio ($filesize, $size); | |
726 | ||
727 | $self->check_quota ($files, $size, $csize); | |
728 | ||
729 | next if POSIX::S_ISDIR ($mode); | |
730 | next if !POSIX::S_ISREG ($mode); | |
731 | ||
732 | my $name = basename (LibArchive::archive_entry_pathname ($entry)); | |
733 | $name =~ s|[^A-Za-z0-9\.]|-|g; | |
734 | my $newfn = sprintf "$tmpdir/A%08d_$name", $tid++; | |
735 | ||
736 | $self->add_glob_mime_type ($name); | |
737 | ||
738 | my $outfd; | |
739 | ||
740 | eval { | |
741 | my $bytes = 0; | |
742 | my $ct; | |
743 | my $todo = 1; | |
744 | ||
745 | if ($us > 0) { | |
746 | my $len; | |
747 | my $buf; | |
748 | while (($len = LibArchive::archive_read_data($a, $buf, 128*1024)) > 0) { | |
776097f1 | 749 | |
425ef29c DM |
750 | if (!$bytes) { |
751 | if ($ct = xdg_mime_get_mime_type_for_data ($buf, $len)) { | |
752 | $self->{mime}->{$ct} = 1; | |
753 | ||
754 | if (!is_archive ($ct)) { | |
755 | $todo = 0; | |
756 | last if $self->{ctonly}; | |
757 | } | |
758 | } | |
759 | } | |
776097f1 | 760 | |
425ef29c DM |
761 | $bytes += $len; |
762 | ||
763 | if (!$outfd) { # create only when needed | |
764 | $outfd = IO::File->new; | |
776097f1 | 765 | |
425ef29c DM |
766 | if (!$outfd->open ($newfn, O_CREAT|O_EXCL|O_WRONLY, 0640)) { |
767 | die "unable to create file $newfn: $!"; | |
768 | } | |
769 | } | |
770 | ||
771 | if (!$outfd->print ($buf)) { | |
772 | die "unable to write '$newfn' - $!"; | |
773 | } | |
774 | } | |
775 | ||
776 | die ("error reading archive (encrypted)\n") | |
777 | if ($len < 0); | |
778 | } | |
779 | ||
780 | $self->todo_list_add ($newfn, $ct, $bytes) if $todo; | |
781 | }; | |
782 | ||
783 | my $err = $@; | |
784 | ||
785 | $outfd->close () if $outfd; | |
776097f1 | 786 | |
425ef29c DM |
787 | if ($err) { |
788 | unlink $newfn; | |
789 | die $err; | |
790 | } | |
776097f1 | 791 | } |
425ef29c DM |
792 | }); |
793 | }; | |
794 | ||
795 | my $err = $@; | |
796 | ||
797 | LibArchive::archive_read_close($a); | |
798 | LibArchive::archive_read_finish($a); | |
776097f1 | 799 | |
425ef29c DM |
800 | die $err if $err; |
801 | ||
802 | $self->check_quota ($files, $size, $csize, 1); # commit sizes | |
803 | ||
804 | return 1; | |
805 | } | |
806 | ||
807 | sub generic_unpack { | |
808 | my ($self, $app, $filename, $tmpdir, $csize, $filesize) = @_; | |
809 | ||
810 | my $size = 0; | |
811 | my $files = 0; | |
812 | ||
813 | my $timeout = $self->check_timeout(); | |
814 | ||
815 | my @listcmd; | |
816 | my @restorecmd = ('/bin/false'); | |
817 | ||
776097f1 | 818 | my $filter; |
425ef29c DM |
819 | |
820 | if ($app eq 'tar') { | |
821 | @listcmd = ('/bin/tar', '-tvf', $filename); | |
822 | @restorecmd = ('/bin/tar', '-x', '--backup=number', "--transform='s,[^A-Za-z0-9\./],-,g'", '-o', | |
823 | '-m', '-C', $tmpdir, '-f', $filename); | |
824 | $filter = sub { | |
825 | my $line = shift; | |
826 | if ($line =~ m/^(\S)\S+\s+\S+\s+([\d,\.]+)\s+\S+/) { | |
827 | my ($type, $bytes) = ($1, $2); | |
828 | $bytes =~ s/[,\.]//g; | |
829 | ||
830 | if ($type eq 'd') { | |
831 | $bytes = realsize ($bytes, 1); | |
832 | } elsif ($type eq '-') { | |
833 | $bytes = realsize ($bytes); | |
834 | } else { | |
835 | $bytes = 256; # simple assumption | |
836 | } | |
837 | ||
838 | $size += $bytes; | |
839 | $files++; | |
840 | ||
841 | $self->check_comp_ratio ($filesize, $size); | |
842 | $self->check_quota ($files, $size, $csize); | |
843 | ||
844 | } else { | |
845 | die "can't parse tar output: $line\n"; | |
846 | } | |
847 | } | |
848 | } elsif ($app eq '7z' || $app eq '7zsimple') { | |
849 | # Note: set password to 'none' with '-pnone', to avoid reading from /dev/tty | |
850 | @restorecmd = ('/usr/bin/7z', 'e', '-pnone', '-bd', '-y', '-aou', "-w$self->{tmpdir}", "-o$tmpdir", $filename); | |
851 | ||
852 | @listcmd = ('/usr/bin/7z', 'l', '-slt', $filename); | |
853 | ||
854 | my ($path, $folder, $bytes); | |
855 | ||
856 | $filter = sub { | |
857 | my $line = shift; | |
858 | chomp $line; | |
859 | ||
860 | if ($line =~ m/^\s*\z/) { | |
861 | if (defined ($path) && defined ($bytes)) { | |
862 | $bytes = realsize ($bytes, $folder); | |
863 | $size += $bytes; | |
864 | $files++; | |
865 | ||
866 | $self->check_comp_ratio ($filesize, $size); | |
867 | $self->check_quota ($files, $size, $csize); | |
868 | } | |
869 | undef $path; | |
870 | undef $folder; | |
871 | undef $bytes; | |
776097f1 | 872 | |
425ef29c DM |
873 | } elsif ($line =~ m/^Path = (.*)\z/s) { |
874 | $path = $1; | |
875 | } elsif ($line =~ m/^Size = (\d+)\z/s) { | |
876 | $bytes = $1; | |
877 | } elsif ($line =~ m/^Folder = (\d+)\z/s) { | |
878 | $folder = $1; | |
879 | } elsif ($line =~ m/^Attributes = ([D\.][R\.][H\.][S\.][A\.])\z/s) { | |
880 | $folder = 1 if $1 && substr ($1, 0, 1) eq 'D'; | |
881 | } | |
882 | }; | |
883 | ||
884 | } elsif ($app eq 'tnef') { | |
885 | @listcmd = ('/usr/bin/tnef', '-tv', '-f', $filename); | |
886 | @restorecmd = ('/usr/bin/tnef', '-C', $tmpdir, '--number-backups', '-f', $filename); | |
887 | ||
888 | $filter = sub { | |
889 | my $line = shift; | |
890 | chomp $line; | |
891 | ||
892 | if ($line =~ m!^\s*(\d+)\s*|\s*\d{4}/\d{1,2}/\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2}\s*|!) { | |
893 | my $bytes = $1; | |
894 | ||
895 | $bytes = realsize ($bytes); | |
896 | $size += $bytes; | |
897 | $files++; | |
898 | ||
899 | $self->check_comp_ratio ($filesize, $size); | |
900 | $self->check_quota ($files, $size, $csize); | |
901 | } else { | |
902 | die "can't parse tnef output\n"; | |
903 | } | |
904 | ||
905 | }; | |
906 | ||
907 | } else { | |
908 | die "unknown application '$app'"; | |
909 | } | |
910 | ||
911 | eval { | |
912 | ||
913 | my $cfh = IO::File->new(); | |
914 | my $pid = helper_pipe_open ($cfh, '/dev/null', '/dev/null', @listcmd); | |
776097f1 | 915 | |
425ef29c DM |
916 | helper_pipe_consume ($cfh, $pid, $timeout, 0, $filter); |
917 | }; | |
918 | ||
919 | my $err = $@; | |
920 | ||
921 | die $err if $err; | |
922 | ||
923 | return if !$files; # empty archive | |
924 | ||
925 | $self->check_quota ($files, $size, $csize, 1); | |
926 | ||
927 | $timeout = $self->check_timeout(); | |
928 | ||
929 | my $cfh = IO::File->new(); | |
930 | my $pid = helper_pipe_open ($cfh, '/dev/null', undef, @restorecmd); | |
931 | helper_pipe_consume ($cfh, $pid, $timeout, 0, sub { | |
932 | my $line = shift; | |
933 | print "$app: $line" if $self->{debug}; | |
934 | }); | |
935 | ||
936 | return 1; | |
937 | } | |
938 | ||
939 | sub unpack_dir { | |
940 | my ($self, $dirname, $level) = @_; | |
941 | ||
942 | local (*DIR); | |
943 | ||
944 | print "unpack dir '$dirname'\n" if $self->{debug}; | |
945 | ||
946 | opendir(DIR, $dirname) || die "can't opendir $dirname: $!"; | |
947 | ||
948 | my $name; | |
949 | ||
776097f1 | 950 | while (defined ($name = readdir (DIR))) { |
425ef29c DM |
951 | my $path = "$dirname/$name"; |
952 | my $st = lstat ($path); | |
953 | ||
954 | if (!$st) { | |
955 | die "no such file '$path' - $!"; | |
956 | } elsif (POSIX::S_ISDIR ($st->mode)) { | |
957 | next if ($name eq '.' || $name eq '..'); | |
958 | $self->unpack_dir ($path, $level); | |
959 | } elsif (POSIX::S_ISREG ($st->mode)) { | |
960 | my $size = $st->size; | |
961 | $self->__unpack_archive ($path, $level + 1, $size); | |
962 | } | |
963 | } | |
964 | ||
965 | closedir DIR; | |
966 | } | |
967 | ||
968 | sub unpack_todo { | |
969 | my ($self, $level) = @_; | |
970 | ||
971 | my $ta = $self->{todo}; | |
972 | $self->{todo} = []; | |
973 | ||
974 | foreach my $todo (@$ta) { | |
975 | $self->__unpack_archive ($todo->[0], $level, $todo->[2], $todo->[1]); | |
976 | } | |
977 | } | |
978 | ||
979 | sub __unpack_archive { | |
980 | my ($self, $filename, $level, $size, $ct) = @_; | |
981 | ||
982 | $level = 0 if !$level; | |
983 | ||
776097f1 | 984 | $self->{levels} = max2($self->{levels}, $level); |
425ef29c DM |
985 | |
986 | if ($self->{maxrec} && ($level >= $self->{maxrec})) { | |
776097f1 | 987 | return if $self->{maxrec_soft}; |
425ef29c DM |
988 | die "max recursion limit reached\n"; |
989 | } | |
990 | ||
991 | die "undefined file size" if !defined ($size); | |
992 | ||
993 | return if !$size; # nothing to do | |
994 | ||
995 | if (!$ct) { | |
91a5af02 DM |
996 | $ct = PMG::Utils::magic_mime_type_for_file($filename); |
997 | $self->add_glob_mime_type($filename); | |
425ef29c DM |
998 | } |
999 | ||
1000 | if ($ct) { | |
1001 | $self->{mime}->{$ct} = 1; | |
1002 | ||
776097f1 | 1003 | if (defined($decompressors->{$ct})) { |
425ef29c DM |
1004 | |
1005 | my ($app, $code) = @{$decompressors->{$ct}}; | |
1006 | ||
1007 | if ($app) { | |
1008 | ||
1009 | # we try to keep extension correctly | |
776097f1 DM |
1010 | my $tmp = basename($filename); |
1011 | ($ct eq 'application/gzip') && | |
425ef29c | 1012 | $tmp =~ s/\.gz\z//; |
776097f1 | 1013 | ($ct eq 'application/x-bzip') && |
425ef29c | 1014 | $tmp =~ s/\.bz2?\z//; |
776097f1 | 1015 | ($ct eq 'application/x-compress') && |
425ef29c | 1016 | $tmp =~ s/\.Z\z//; |
776097f1 | 1017 | ($ct eq 'application/x-compressed-tar') && |
425ef29c | 1018 | $tmp =~ s/\.gz\z// || $tmp =~ s/\.tgz\z/.tar/; |
776097f1 | 1019 | ($ct eq 'application/x-bzip-compressed-tar') && |
425ef29c DM |
1020 | $tmp =~ s/\.bz2?\z// || $tmp =~ s/\.tbz\z/.tar/; |
1021 | ($ct eq 'application/x-tarz') && | |
1022 | $tmp =~ s/\.Z\z//; | |
1023 | ||
776097f1 | 1024 | my $newname = sprintf "%s/DC_%08d_%s", $self->{tmpdir}, ++$self->{ufid}, $tmp; |
425ef29c DM |
1025 | |
1026 | print "Decomp: $filename\n\t($ct) with $app to $newname\n" | |
776097f1 | 1027 | if $self->{debug}; |
425ef29c | 1028 | |
776097f1 | 1029 | if (my $res = &$code($self, $app, $filename, $newname, $level ? $size : 0, $size)) { |
425ef29c DM |
1030 | unlink $filename if $level; |
1031 | $self->unpack_todo ($level + 1); | |
1032 | } | |
1033 | } | |
1034 | } elsif (defined ($unpackers->{$ct})) { | |
1035 | ||
1036 | my ($app, $code, $ctdetect) = @{$unpackers->{$ct}}; | |
1037 | ||
1038 | if ($app) { | |
1039 | ||
776097f1 | 1040 | my $tmpdir = sprintf "%s/DIR_%08d", $self->{tmpdir}, ++$self->{udid}; |
425ef29c | 1041 | mkdir $tmpdir; |
776097f1 | 1042 | |
425ef29c DM |
1043 | print "Unpack: $filename\n\t($ct) with $app to $tmpdir\n" |
1044 | if $self->{debug}; | |
1045 | ||
1046 | if (my $res = &$code ($self, $app, $filename, $tmpdir, $level ? $size : 0, $size)) { | |
1047 | unlink $filename if $level; | |
1048 | ||
1049 | if ($ctdetect) { | |
1050 | $self->unpack_todo ($level + 1); | |
1051 | } else { | |
1052 | $self->unpack_dir ($tmpdir, $level); | |
1053 | } | |
1054 | } | |
1055 | } | |
1056 | } | |
1057 | } | |
1058 | } | |
1059 | ||
1060 | sub is_archive { | |
776097f1 | 1061 | my ($ct) = @_; |
425ef29c | 1062 | |
776097f1 | 1063 | return defined($decompressors->{$ct}) || defined($unpackers->{$ct}); |
425ef29c DM |
1064 | } |
1065 | ||
1066 | # unpack_archive | |
1067 | # | |
1068 | # Description: unpacks an archive and records containing | |
1069 | # content types (detected by magic numbers and file extension) | |
776097f1 DM |
1070 | # Extracted files are stored inside 'tempdir'. |
1071 | # | |
425ef29c DM |
1072 | # returns: true if file is archive, undef otherwhise |
1073 | ||
1074 | sub unpack_archive { | |
1075 | my ($self, $filename, $ct) = @_; | |
1076 | ||
776097f1 | 1077 | my $st = lstat($filename); |
425ef29c DM |
1078 | my $size = 0; |
1079 | ||
1080 | if (!$st) { | |
1081 | die "no such file '$filename' - $!"; | |
776097f1 | 1082 | } elsif (POSIX::S_ISREG($st->mode)) { |
425ef29c DM |
1083 | $size = $st->size; |
1084 | ||
1085 | return if !$size; # do nothing | |
1086 | ||
1087 | $self->{quota} = $self->{maxquota} - $self->{size}; | |
1088 | ||
1089 | $self->{ratioquota} = $size * $self->{maxratio} if $self->{maxratio}; | |
776097f1 | 1090 | |
425ef29c DM |
1091 | } else { |
1092 | return; # do nothing | |
1093 | } | |
776097f1 | 1094 | |
91a5af02 | 1095 | $ct = PMG::Utils::magic_mime_type_for_file($filename) if !$ct; |
425ef29c | 1096 | |
776097f1 | 1097 | return if (!$ct || !is_archive($ct)); # not an archive |
425ef29c DM |
1098 | |
1099 | eval { | |
776097f1 | 1100 | $self->__unpack_archive($filename, 0, $st->size, $ct); |
425ef29c DM |
1101 | }; |
1102 | ||
1103 | my $err = $@; | |
1104 | ||
776097f1 | 1105 | printf "ELAPSED: %.2f ms $filename\n", |
425ef29c DM |
1106 | int(tv_interval ($self->{starttime}) * 1000) |
1107 | if $self->{debug}; | |
1108 | ||
1109 | if ($err) { | |
776097f1 | 1110 | $self->{mime}->{'proxmox/unreadable-archive'} = 1; |
425ef29c DM |
1111 | die $err; |
1112 | } | |
1113 | return 1; | |
1114 | } | |
1115 | ||
1116 | 1; | |
1117 | __END__ |