]>
Commit | Line | Data |
---|---|---|
92f5a8d4 TL |
1 | /*-----------------------------------------------------------*/ |
2 | /*--- Block recoverer program for bzip2 ---*/ | |
3 | /*--- bzip2recover.c ---*/ | |
4 | /*-----------------------------------------------------------*/ | |
5 | ||
6 | /* ------------------------------------------------------------------ | |
7 | This file is part of bzip2/libbzip2, a program and library for | |
8 | lossless, block-sorting data compression. | |
9 | ||
10 | bzip2/libbzip2 version 1.0.6 of 6 September 2010 | |
11 | Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> | |
12 | ||
13 | Please read the WARNING, DISCLAIMER and PATENTS sections in the | |
14 | README file. | |
15 | ||
16 | This program is released under the terms of the license contained | |
17 | in the file LICENSE. | |
18 | ------------------------------------------------------------------ */ | |
19 | ||
20 | /* This program is a complete hack and should be rewritten properly. | |
21 | It isn't very complicated. */ | |
22 | ||
23 | #include <stdio.h> | |
24 | #include <errno.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | ||
28 | ||
29 | /* This program records bit locations in the file to be recovered. | |
30 | That means that if 64-bit ints are not supported, we will not | |
31 | be able to recover .bz2 files over 512MB (2^32 bits) long. | |
32 | On GNU supported platforms, we take advantage of the 64-bit | |
33 | int support to circumvent this problem. Ditto MSVC. | |
34 | ||
35 | This change occurred in version 1.0.2; all prior versions have | |
36 | the 512MB limitation. | |
37 | */ | |
38 | #ifdef __GNUC__ | |
39 | typedef unsigned long long int MaybeUInt64; | |
40 | # define MaybeUInt64_FMT "%Lu" | |
41 | #else | |
42 | #ifdef _MSC_VER | |
43 | typedef unsigned __int64 MaybeUInt64; | |
44 | # define MaybeUInt64_FMT "%I64u" | |
45 | #else | |
46 | typedef unsigned int MaybeUInt64; | |
47 | # define MaybeUInt64_FMT "%u" | |
48 | #endif | |
49 | #endif | |
50 | ||
51 | typedef unsigned int UInt32; | |
52 | typedef int Int32; | |
53 | typedef unsigned char UChar; | |
54 | typedef char Char; | |
55 | typedef unsigned char Bool; | |
56 | #define True ((Bool)1) | |
57 | #define False ((Bool)0) | |
58 | ||
59 | ||
60 | #define BZ_MAX_FILENAME 2000 | |
61 | ||
62 | Char inFileName[BZ_MAX_FILENAME]; | |
63 | Char outFileName[BZ_MAX_FILENAME]; | |
64 | Char progName[BZ_MAX_FILENAME]; | |
65 | ||
66 | MaybeUInt64 bytesOut = 0; | |
67 | MaybeUInt64 bytesIn = 0; | |
68 | ||
69 | ||
70 | /*---------------------------------------------------*/ | |
71 | /*--- Header bytes ---*/ | |
72 | /*---------------------------------------------------*/ | |
73 | ||
74 | #define BZ_HDR_B 0x42 /* 'B' */ | |
75 | #define BZ_HDR_Z 0x5a /* 'Z' */ | |
76 | #define BZ_HDR_h 0x68 /* 'h' */ | |
77 | #define BZ_HDR_0 0x30 /* '0' */ | |
78 | ||
79 | ||
80 | /*---------------------------------------------------*/ | |
81 | /*--- I/O errors ---*/ | |
82 | /*---------------------------------------------------*/ | |
83 | ||
84 | /*---------------------------------------------*/ | |
85 | static void readError ( void ) | |
86 | { | |
87 | fprintf ( stderr, | |
88 | "%s: I/O error reading `%s', possible reason follows.\n", | |
89 | progName, inFileName ); | |
90 | perror ( progName ); | |
91 | fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", | |
92 | progName ); | |
93 | exit ( 1 ); | |
94 | } | |
95 | ||
96 | ||
97 | /*---------------------------------------------*/ | |
98 | static void writeError ( void ) | |
99 | { | |
100 | fprintf ( stderr, | |
101 | "%s: I/O error reading `%s', possible reason follows.\n", | |
102 | progName, inFileName ); | |
103 | perror ( progName ); | |
104 | fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", | |
105 | progName ); | |
106 | exit ( 1 ); | |
107 | } | |
108 | ||
109 | ||
110 | /*---------------------------------------------*/ | |
111 | static void mallocFail ( Int32 n ) | |
112 | { | |
113 | fprintf ( stderr, | |
114 | "%s: malloc failed on request for %d bytes.\n", | |
115 | progName, n ); | |
116 | fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", | |
117 | progName ); | |
118 | exit ( 1 ); | |
119 | } | |
120 | ||
121 | ||
122 | /*---------------------------------------------*/ | |
123 | static void tooManyBlocks ( Int32 max_handled_blocks ) | |
124 | { | |
125 | fprintf ( stderr, | |
126 | "%s: `%s' appears to contain more than %d blocks\n", | |
127 | progName, inFileName, max_handled_blocks ); | |
128 | fprintf ( stderr, | |
129 | "%s: and cannot be handled. To fix, increase\n", | |
130 | progName ); | |
131 | fprintf ( stderr, | |
132 | "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", | |
133 | progName ); | |
134 | exit ( 1 ); | |
135 | } | |
136 | ||
137 | ||
138 | ||
139 | /*---------------------------------------------------*/ | |
140 | /*--- Bit stream I/O ---*/ | |
141 | /*---------------------------------------------------*/ | |
142 | ||
143 | typedef | |
144 | struct { | |
145 | FILE* handle; | |
146 | Int32 buffer; | |
147 | Int32 buffLive; | |
148 | Char mode; | |
149 | } | |
150 | BitStream; | |
151 | ||
152 | ||
153 | /*---------------------------------------------*/ | |
154 | static BitStream* bsOpenReadStream ( FILE* stream ) | |
155 | { | |
156 | BitStream *bs = malloc ( sizeof(BitStream) ); | |
157 | if (bs == NULL) mallocFail ( sizeof(BitStream) ); | |
158 | bs->handle = stream; | |
159 | bs->buffer = 0; | |
160 | bs->buffLive = 0; | |
161 | bs->mode = 'r'; | |
162 | return bs; | |
163 | } | |
164 | ||
165 | ||
166 | /*---------------------------------------------*/ | |
167 | static BitStream* bsOpenWriteStream ( FILE* stream ) | |
168 | { | |
169 | BitStream *bs = malloc ( sizeof(BitStream) ); | |
170 | if (bs == NULL) mallocFail ( sizeof(BitStream) ); | |
171 | bs->handle = stream; | |
172 | bs->buffer = 0; | |
173 | bs->buffLive = 0; | |
174 | bs->mode = 'w'; | |
175 | return bs; | |
176 | } | |
177 | ||
178 | ||
179 | /*---------------------------------------------*/ | |
180 | static void bsPutBit ( BitStream* bs, Int32 bit ) | |
181 | { | |
182 | if (bs->buffLive == 8) { | |
183 | Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); | |
184 | if (retVal == EOF) writeError(); | |
185 | bytesOut++; | |
186 | bs->buffLive = 1; | |
187 | bs->buffer = bit & 0x1; | |
188 | } else { | |
189 | bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); | |
190 | bs->buffLive++; | |
191 | }; | |
192 | } | |
193 | ||
194 | ||
195 | /*---------------------------------------------*/ | |
196 | /*-- | |
197 | Returns 0 or 1, or 2 to indicate EOF. | |
198 | --*/ | |
199 | static Int32 bsGetBit ( BitStream* bs ) | |
200 | { | |
201 | if (bs->buffLive > 0) { | |
202 | bs->buffLive --; | |
203 | return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); | |
204 | } else { | |
205 | Int32 retVal = getc ( bs->handle ); | |
206 | if ( retVal == EOF ) { | |
207 | if (errno != 0) readError(); | |
208 | return 2; | |
209 | } | |
210 | bs->buffLive = 7; | |
211 | bs->buffer = retVal; | |
212 | return ( ((bs->buffer) >> 7) & 0x1 ); | |
213 | } | |
214 | } | |
215 | ||
216 | ||
217 | /*---------------------------------------------*/ | |
218 | static void bsClose ( BitStream* bs ) | |
219 | { | |
220 | Int32 retVal; | |
221 | ||
222 | if ( bs->mode == 'w' ) { | |
223 | while ( bs->buffLive < 8 ) { | |
224 | bs->buffLive++; | |
225 | bs->buffer <<= 1; | |
226 | }; | |
227 | retVal = putc ( (UChar) (bs->buffer), bs->handle ); | |
228 | if (retVal == EOF) writeError(); | |
229 | bytesOut++; | |
230 | retVal = fflush ( bs->handle ); | |
231 | if (retVal == EOF) writeError(); | |
232 | } | |
233 | retVal = fclose ( bs->handle ); | |
234 | if (retVal == EOF) { | |
235 | if (bs->mode == 'w') writeError(); else readError(); | |
236 | } | |
237 | free ( bs ); | |
238 | } | |
239 | ||
240 | ||
241 | /*---------------------------------------------*/ | |
242 | static void bsPutUChar ( BitStream* bs, UChar c ) | |
243 | { | |
244 | Int32 i; | |
245 | for (i = 7; i >= 0; i--) | |
246 | bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); | |
247 | } | |
248 | ||
249 | ||
250 | /*---------------------------------------------*/ | |
251 | static void bsPutUInt32 ( BitStream* bs, UInt32 c ) | |
252 | { | |
253 | Int32 i; | |
254 | ||
255 | for (i = 31; i >= 0; i--) | |
256 | bsPutBit ( bs, (c >> i) & 0x1 ); | |
257 | } | |
258 | ||
259 | ||
260 | /*---------------------------------------------*/ | |
261 | static Bool endsInBz2 ( Char* name ) | |
262 | { | |
263 | Int32 n = strlen ( name ); | |
264 | if (n <= 4) return False; | |
265 | return | |
266 | (name[n-4] == '.' && | |
267 | name[n-3] == 'b' && | |
268 | name[n-2] == 'z' && | |
269 | name[n-1] == '2'); | |
270 | } | |
271 | ||
272 | ||
273 | /*---------------------------------------------------*/ | |
274 | /*--- ---*/ | |
275 | /*---------------------------------------------------*/ | |
276 | ||
277 | /* This logic isn't really right when it comes to Cygwin. */ | |
278 | #ifdef _WIN32 | |
279 | # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ | |
280 | #else | |
281 | # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ | |
282 | #endif | |
283 | ||
284 | #define BLOCK_HEADER_HI 0x00003141UL | |
285 | #define BLOCK_HEADER_LO 0x59265359UL | |
286 | ||
287 | #define BLOCK_ENDMARK_HI 0x00001772UL | |
288 | #define BLOCK_ENDMARK_LO 0x45385090UL | |
289 | ||
290 | /* Increase if necessary. However, a .bz2 file with > 50000 blocks | |
291 | would have an uncompressed size of at least 40GB, so the chances | |
292 | are low you'll need to up this. | |
293 | */ | |
294 | #define BZ_MAX_HANDLED_BLOCKS 50000 | |
295 | ||
296 | MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; | |
297 | MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; | |
298 | MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; | |
299 | MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; | |
300 | ||
301 | Int32 main ( Int32 argc, Char** argv ) | |
302 | { | |
303 | FILE* inFile; | |
304 | FILE* outFile; | |
305 | BitStream* bsIn, *bsWr; | |
306 | Int32 b, wrBlock, currBlock, rbCtr; | |
307 | MaybeUInt64 bitsRead; | |
308 | ||
309 | UInt32 buffHi, buffLo, blockCRC; | |
310 | Char* p; | |
311 | ||
312 | strcpy ( progName, argv[0] ); | |
313 | inFileName[0] = outFileName[0] = 0; | |
314 | ||
315 | fprintf ( stderr, | |
316 | "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" ); | |
317 | ||
318 | if (argc != 2) { | |
319 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", | |
320 | progName, progName ); | |
321 | switch (sizeof(MaybeUInt64)) { | |
322 | case 8: | |
323 | fprintf(stderr, | |
324 | "\trestrictions on size of recovered file: None\n"); | |
325 | break; | |
326 | case 4: | |
327 | fprintf(stderr, | |
328 | "\trestrictions on size of recovered file: 512 MB\n"); | |
329 | fprintf(stderr, | |
330 | "\tto circumvent, recompile with MaybeUInt64 as an\n" | |
331 | "\tunsigned 64-bit int.\n"); | |
332 | break; | |
333 | default: | |
334 | fprintf(stderr, | |
335 | "\tsizeof(MaybeUInt64) is not 4 or 8 -- " | |
336 | "configuration error.\n"); | |
337 | break; | |
338 | } | |
339 | exit(1); | |
340 | } | |
341 | ||
342 | if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { | |
343 | fprintf ( stderr, | |
344 | "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", | |
345 | progName, (int)strlen(argv[1]) ); | |
346 | exit(1); | |
347 | } | |
348 | ||
349 | strcpy ( inFileName, argv[1] ); | |
350 | ||
351 | inFile = fopen ( inFileName, "rb" ); | |
352 | if (inFile == NULL) { | |
353 | fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); | |
354 | exit(1); | |
355 | } | |
356 | ||
357 | bsIn = bsOpenReadStream ( inFile ); | |
358 | fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); | |
359 | ||
360 | bitsRead = 0; | |
361 | buffHi = buffLo = 0; | |
362 | currBlock = 0; | |
363 | bStart[currBlock] = 0; | |
364 | ||
365 | rbCtr = 0; | |
366 | ||
367 | while (True) { | |
368 | b = bsGetBit ( bsIn ); | |
369 | bitsRead++; | |
370 | if (b == 2) { | |
371 | if (bitsRead >= bStart[currBlock] && | |
372 | (bitsRead - bStart[currBlock]) >= 40) { | |
373 | bEnd[currBlock] = bitsRead-1; | |
374 | if (currBlock > 0) | |
375 | fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT | |
376 | " to " MaybeUInt64_FMT " (incomplete)\n", | |
377 | currBlock, bStart[currBlock], bEnd[currBlock] ); | |
378 | } else | |
379 | currBlock--; | |
380 | break; | |
381 | } | |
382 | buffHi = (buffHi << 1) | (buffLo >> 31); | |
383 | buffLo = (buffLo << 1) | (b & 1); | |
384 | if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI | |
385 | && buffLo == BLOCK_HEADER_LO) | |
386 | || | |
387 | ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI | |
388 | && buffLo == BLOCK_ENDMARK_LO) | |
389 | ) { | |
390 | if (bitsRead > 49) { | |
391 | bEnd[currBlock] = bitsRead-49; | |
392 | } else { | |
393 | bEnd[currBlock] = 0; | |
394 | } | |
395 | if (currBlock > 0 && | |
396 | (bEnd[currBlock] - bStart[currBlock]) >= 130) { | |
397 | fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT | |
398 | " to " MaybeUInt64_FMT "\n", | |
399 | rbCtr+1, bStart[currBlock], bEnd[currBlock] ); | |
400 | rbStart[rbCtr] = bStart[currBlock]; | |
401 | rbEnd[rbCtr] = bEnd[currBlock]; | |
402 | rbCtr++; | |
403 | } | |
404 | if (currBlock >= BZ_MAX_HANDLED_BLOCKS) | |
405 | tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); | |
406 | currBlock++; | |
407 | ||
408 | bStart[currBlock] = bitsRead; | |
409 | } | |
410 | } | |
411 | ||
412 | bsClose ( bsIn ); | |
413 | ||
414 | /*-- identified blocks run from 1 to rbCtr inclusive. --*/ | |
415 | ||
416 | if (rbCtr < 1) { | |
417 | fprintf ( stderr, | |
418 | "%s: sorry, I couldn't find any block boundaries.\n", | |
419 | progName ); | |
420 | exit(1); | |
421 | }; | |
422 | ||
423 | fprintf ( stderr, "%s: splitting into blocks\n", progName ); | |
424 | ||
425 | inFile = fopen ( inFileName, "rb" ); | |
426 | if (inFile == NULL) { | |
427 | fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); | |
428 | exit(1); | |
429 | } | |
430 | bsIn = bsOpenReadStream ( inFile ); | |
431 | ||
432 | /*-- placate gcc's dataflow analyser --*/ | |
433 | blockCRC = 0; bsWr = 0; | |
434 | ||
435 | bitsRead = 0; | |
436 | outFile = NULL; | |
437 | wrBlock = 0; | |
438 | while (True) { | |
439 | b = bsGetBit(bsIn); | |
440 | if (b == 2) break; | |
441 | buffHi = (buffHi << 1) | (buffLo >> 31); | |
442 | buffLo = (buffLo << 1) | (b & 1); | |
443 | if (bitsRead == 47+rbStart[wrBlock]) | |
444 | blockCRC = (buffHi << 16) | (buffLo >> 16); | |
445 | ||
446 | if (outFile != NULL && bitsRead >= rbStart[wrBlock] | |
447 | && bitsRead <= rbEnd[wrBlock]) { | |
448 | bsPutBit ( bsWr, b ); | |
449 | } | |
450 | ||
451 | bitsRead++; | |
452 | ||
453 | if (bitsRead == rbEnd[wrBlock]+1) { | |
454 | if (outFile != NULL) { | |
455 | bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); | |
456 | bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); | |
457 | bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); | |
458 | bsPutUInt32 ( bsWr, blockCRC ); | |
459 | bsClose ( bsWr ); | |
460 | } | |
461 | if (wrBlock >= rbCtr) break; | |
462 | wrBlock++; | |
463 | } else | |
464 | if (bitsRead == rbStart[wrBlock]) { | |
465 | /* Create the output file name, correctly handling leading paths. | |
466 | (31.10.2001 by Sergey E. Kusikov) */ | |
467 | Char* split; | |
468 | Int32 ofs, k; | |
469 | for (k = 0; k < BZ_MAX_FILENAME; k++) | |
470 | outFileName[k] = 0; | |
471 | strcpy (outFileName, inFileName); | |
472 | split = strrchr (outFileName, BZ_SPLIT_SYM); | |
473 | if (split == NULL) { | |
474 | split = outFileName; | |
475 | } else { | |
476 | ++split; | |
477 | } | |
478 | /* Now split points to the start of the basename. */ | |
479 | ofs = split - outFileName; | |
480 | sprintf (split, "rec%5d", wrBlock+1); | |
481 | for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; | |
482 | strcat (outFileName, inFileName + ofs); | |
483 | ||
484 | if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); | |
485 | ||
486 | fprintf ( stderr, " writing block %d to `%s' ...\n", | |
487 | wrBlock+1, outFileName ); | |
488 | ||
489 | outFile = fopen ( outFileName, "wb" ); | |
490 | if (outFile == NULL) { | |
491 | fprintf ( stderr, "%s: can't write `%s'\n", | |
492 | progName, outFileName ); | |
493 | exit(1); | |
494 | } | |
495 | bsWr = bsOpenWriteStream ( outFile ); | |
496 | bsPutUChar ( bsWr, BZ_HDR_B ); | |
497 | bsPutUChar ( bsWr, BZ_HDR_Z ); | |
498 | bsPutUChar ( bsWr, BZ_HDR_h ); | |
499 | bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); | |
500 | bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); | |
501 | bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); | |
502 | bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); | |
503 | } | |
504 | } | |
505 | ||
506 | fprintf ( stderr, "%s: finished\n", progName ); | |
507 | return 0; | |
508 | } | |
509 | ||
510 | ||
511 | ||
512 | /*-----------------------------------------------------------*/ | |
513 | /*--- end bzip2recover.c ---*/ | |
514 | /*-----------------------------------------------------------*/ |