4 Based on LZMA SDK 4.65:
5 LzFind.c -- Match finder for LZ algorithms
6 2008-10-04 : Igor Pavlov : Public domain
8 Copyright (c) 2009, Intel Corporation<BR>
9 All rights reserved. This program and the accompanying materials
10 are licensed and made available under the terms and conditions of the BSD License
11 which accompanies this distribution. The full text of the license may be found at
12 http://opensource.org/licenses/bsd-license.php
14 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
15 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
28 #define kEmptyHashValue 0
29 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
30 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
31 #define kNormalizeMask (~(kNormalizeStepMin - 1))
32 #define kMaxHistorySize ((UInt32)3 << 30)
34 #define kStartMaxLen 3
36 static void LzInWindow_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
40 alloc
->Free(alloc
, p
->bufferBase
);
45 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
47 static int LzInWindow_Create(CMatchFinder
*p
, UInt32 keepSizeReserv
, ISzAlloc
*alloc
)
49 UInt32 blockSize
= p
->keepSizeBefore
+ p
->keepSizeAfter
+ keepSizeReserv
;
52 p
->blockSize
= blockSize
;
55 if (p
->bufferBase
== 0 || p
->blockSize
!= blockSize
)
57 LzInWindow_Free(p
, alloc
);
58 p
->blockSize
= blockSize
;
59 p
->bufferBase
= (Byte
*)alloc
->Alloc(alloc
, (size_t)blockSize
);
61 return (p
->bufferBase
!= 0);
64 Byte
*MatchFinder_GetPointerToCurrentPos(CMatchFinder
*p
) { return p
->buffer
; }
65 Byte
MatchFinder_GetIndexByte(CMatchFinder
*p
, Int32 index
) { return p
->buffer
[index
]; }
67 UInt32
MatchFinder_GetNumAvailableBytes(CMatchFinder
*p
) { return p
->streamPos
- p
->pos
; }
69 void MatchFinder_ReduceOffsets(CMatchFinder
*p
, UInt32 subValue
)
71 p
->posLimit
-= subValue
;
73 p
->streamPos
-= subValue
;
76 static void MatchFinder_ReadBlock(CMatchFinder
*p
)
78 if (p
->streamEndWasReached
|| p
->result
!= SZ_OK
)
82 Byte
*dest
= p
->buffer
+ (p
->streamPos
- p
->pos
);
83 size_t size
= (p
->bufferBase
+ p
->blockSize
- dest
);
86 p
->result
= p
->stream
->Read(p
->stream
, dest
, &size
);
87 if (p
->result
!= SZ_OK
)
91 p
->streamEndWasReached
= 1;
94 p
->streamPos
+= (UInt32
)size
;
95 if (p
->streamPos
- p
->pos
> p
->keepSizeAfter
)
100 void MatchFinder_MoveBlock(CMatchFinder
*p
)
102 memmove(p
->bufferBase
,
103 p
->buffer
- p
->keepSizeBefore
,
104 (size_t)(p
->streamPos
- p
->pos
+ p
->keepSizeBefore
));
105 p
->buffer
= p
->bufferBase
+ p
->keepSizeBefore
;
108 int MatchFinder_NeedMove(CMatchFinder
*p
)
110 /* if (p->streamEndWasReached) return 0; */
111 return ((size_t)(p
->bufferBase
+ p
->blockSize
- p
->buffer
) <= p
->keepSizeAfter
);
114 void MatchFinder_ReadIfRequired(CMatchFinder
*p
)
116 if (p
->streamEndWasReached
)
118 if (p
->keepSizeAfter
>= p
->streamPos
- p
->pos
)
119 MatchFinder_ReadBlock(p
);
122 static void MatchFinder_CheckAndMoveAndRead(CMatchFinder
*p
)
124 if (MatchFinder_NeedMove(p
))
125 MatchFinder_MoveBlock(p
);
126 MatchFinder_ReadBlock(p
);
129 static void MatchFinder_SetDefaultSettings(CMatchFinder
*p
)
134 /* p->skipModeBits = 0; */
139 #define kCrcPoly 0xEDB88320
141 void MatchFinder_Construct(CMatchFinder
*p
)
147 MatchFinder_SetDefaultSettings(p
);
149 for (i
= 0; i
< 256; i
++)
153 for (j
= 0; j
< 8; j
++)
154 r
= (r
>> 1) ^ (kCrcPoly
& ~((r
& 1) - 1));
159 static void MatchFinder_FreeThisClassMemory(CMatchFinder
*p
, ISzAlloc
*alloc
)
161 alloc
->Free(alloc
, p
->hash
);
165 void MatchFinder_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
167 MatchFinder_FreeThisClassMemory(p
, alloc
);
168 LzInWindow_Free(p
, alloc
);
171 static CLzRef
* AllocRefs(UInt32 num
, ISzAlloc
*alloc
)
173 size_t sizeInBytes
= (size_t)num
* sizeof(CLzRef
);
174 if (sizeInBytes
/ sizeof(CLzRef
) != num
)
176 return (CLzRef
*)alloc
->Alloc(alloc
, sizeInBytes
);
179 int MatchFinder_Create(CMatchFinder
*p
, UInt32 historySize
,
180 UInt32 keepAddBufferBefore
, UInt32 matchMaxLen
, UInt32 keepAddBufferAfter
,
184 if (historySize
> kMaxHistorySize
)
186 MatchFinder_Free(p
, alloc
);
189 sizeReserv
= historySize
>> 1;
190 if (historySize
> ((UInt32
)2 << 30))
191 sizeReserv
= historySize
>> 2;
192 sizeReserv
+= (keepAddBufferBefore
+ matchMaxLen
+ keepAddBufferAfter
) / 2 + (1 << 19);
194 p
->keepSizeBefore
= historySize
+ keepAddBufferBefore
+ 1;
195 p
->keepSizeAfter
= matchMaxLen
+ keepAddBufferAfter
;
196 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
197 if (LzInWindow_Create(p
, sizeReserv
, alloc
))
199 UInt32 newCyclicBufferSize
= (historySize
/* >> p->skipModeBits */) + 1;
201 p
->matchMaxLen
= matchMaxLen
;
203 p
->fixedHashSize
= 0;
204 if (p
->numHashBytes
== 2)
208 hs
= historySize
- 1;
214 /* hs >>= p->skipModeBits; */
215 hs
|= 0xFFFF; /* don't change it! It's required for Deflate */
218 if (p
->numHashBytes
== 3)
226 if (p
->numHashBytes
> 2) p
->fixedHashSize
+= kHash2Size
;
227 if (p
->numHashBytes
> 3) p
->fixedHashSize
+= kHash3Size
;
228 if (p
->numHashBytes
> 4) p
->fixedHashSize
+= kHash4Size
;
229 hs
+= p
->fixedHashSize
;
233 UInt32 prevSize
= p
->hashSizeSum
+ p
->numSons
;
235 p
->historySize
= historySize
;
237 p
->cyclicBufferSize
= newCyclicBufferSize
;
238 p
->numSons
= (p
->btMode
? newCyclicBufferSize
* 2 : newCyclicBufferSize
);
239 newSize
= p
->hashSizeSum
+ p
->numSons
;
240 if (p
->hash
!= 0 && prevSize
== newSize
)
242 MatchFinder_FreeThisClassMemory(p
, alloc
);
243 p
->hash
= AllocRefs(newSize
, alloc
);
246 p
->son
= p
->hash
+ p
->hashSizeSum
;
251 MatchFinder_Free(p
, alloc
);
255 static void MatchFinder_SetLimits(CMatchFinder
*p
)
257 UInt32 limit
= kMaxValForNormalize
- p
->pos
;
258 UInt32 limit2
= p
->cyclicBufferSize
- p
->cyclicBufferPos
;
261 limit2
= p
->streamPos
- p
->pos
;
262 if (limit2
<= p
->keepSizeAfter
)
268 limit2
-= p
->keepSizeAfter
;
272 UInt32 lenLimit
= p
->streamPos
- p
->pos
;
273 if (lenLimit
> p
->matchMaxLen
)
274 lenLimit
= p
->matchMaxLen
;
275 p
->lenLimit
= lenLimit
;
277 p
->posLimit
= p
->pos
+ limit
;
280 void MatchFinder_Init(CMatchFinder
*p
)
283 for (i
= 0; i
< p
->hashSizeSum
; i
++)
284 p
->hash
[i
] = kEmptyHashValue
;
285 p
->cyclicBufferPos
= 0;
286 p
->buffer
= p
->bufferBase
;
287 p
->pos
= p
->streamPos
= p
->cyclicBufferSize
;
289 p
->streamEndWasReached
= 0;
290 MatchFinder_ReadBlock(p
);
291 MatchFinder_SetLimits(p
);
294 static UInt32
MatchFinder_GetSubValue(CMatchFinder
*p
)
296 return (p
->pos
- p
->historySize
- 1) & kNormalizeMask
;
299 void MatchFinder_Normalize3(UInt32 subValue
, CLzRef
*items
, UInt32 numItems
)
302 for (i
= 0; i
< numItems
; i
++)
304 UInt32 value
= items
[i
];
305 if (value
<= subValue
)
306 value
= kEmptyHashValue
;
313 static void MatchFinder_Normalize(CMatchFinder
*p
)
315 UInt32 subValue
= MatchFinder_GetSubValue(p
);
316 MatchFinder_Normalize3(subValue
, p
->hash
, p
->hashSizeSum
+ p
->numSons
);
317 MatchFinder_ReduceOffsets(p
, subValue
);
320 static void MatchFinder_CheckLimits(CMatchFinder
*p
)
322 if (p
->pos
== kMaxValForNormalize
)
323 MatchFinder_Normalize(p
);
324 if (!p
->streamEndWasReached
&& p
->keepSizeAfter
== p
->streamPos
- p
->pos
)
325 MatchFinder_CheckAndMoveAndRead(p
);
326 if (p
->cyclicBufferPos
== p
->cyclicBufferSize
)
327 p
->cyclicBufferPos
= 0;
328 MatchFinder_SetLimits(p
);
331 static UInt32
* Hc_GetMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
332 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
333 UInt32
*distances
, UInt32 maxLen
)
335 son
[_cyclicBufferPos
] = curMatch
;
338 UInt32 delta
= pos
- curMatch
;
339 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
342 const Byte
*pb
= cur
- delta
;
343 curMatch
= son
[_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)];
344 if (pb
[maxLen
] == cur
[maxLen
] && *pb
== *cur
)
347 while (++len
!= lenLimit
)
348 if (pb
[len
] != cur
[len
])
352 *distances
++ = maxLen
= len
;
353 *distances
++ = delta
- 1;
362 UInt32
* GetMatchesSpec1(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
363 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
364 UInt32
*distances
, UInt32 maxLen
)
366 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
367 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
368 UInt32 len0
= 0, len1
= 0;
371 UInt32 delta
= pos
- curMatch
;
372 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
374 *ptr0
= *ptr1
= kEmptyHashValue
;
378 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
379 const Byte
*pb
= cur
- delta
;
380 UInt32 len
= (len0
< len1
? len0
: len1
);
381 if (pb
[len
] == cur
[len
])
383 if (++len
!= lenLimit
&& pb
[len
] == cur
[len
])
384 while (++len
!= lenLimit
)
385 if (pb
[len
] != cur
[len
])
389 *distances
++ = maxLen
= len
;
390 *distances
++ = delta
- 1;
399 if (pb
[len
] < cur
[len
])
417 static void SkipMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
418 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
)
420 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
421 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
422 UInt32 len0
= 0, len1
= 0;
425 UInt32 delta
= pos
- curMatch
;
426 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
428 *ptr0
= *ptr1
= kEmptyHashValue
;
432 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
433 const Byte
*pb
= cur
- delta
;
434 UInt32 len
= (len0
< len1
? len0
: len1
);
435 if (pb
[len
] == cur
[len
])
437 while (++len
!= lenLimit
)
438 if (pb
[len
] != cur
[len
])
449 if (pb
[len
] < cur
[len
])
468 ++p->cyclicBufferPos; \
470 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
472 #define MOVE_POS_RET MOVE_POS return offset;
474 static void MatchFinder_MovePos(CMatchFinder
*p
) { MOVE_POS
; }
476 #define GET_MATCHES_HEADER2(minLen, ret_op) \
477 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
478 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
481 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
482 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
484 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
486 #define GET_MATCHES_FOOTER(offset, maxLen) \
487 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
488 distances + offset, maxLen) - distances); MOVE_POS_RET;
490 #define SKIP_FOOTER \
491 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
493 static UInt32
Bt2_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
496 GET_MATCHES_HEADER(2)
498 curMatch
= p
->hash
[hashValue
];
499 p
->hash
[hashValue
] = p
->pos
;
501 GET_MATCHES_FOOTER(offset
, 1)
504 UInt32
Bt3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
507 GET_MATCHES_HEADER(3)
509 curMatch
= p
->hash
[hashValue
];
510 p
->hash
[hashValue
] = p
->pos
;
512 GET_MATCHES_FOOTER(offset
, 2)
515 static UInt32
Bt3_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
517 UInt32 hash2Value
, delta2
, maxLen
, offset
;
518 GET_MATCHES_HEADER(3)
522 delta2
= p
->pos
- p
->hash
[hash2Value
];
523 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
525 p
->hash
[hash2Value
] =
526 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
531 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
533 for (; maxLen
!= lenLimit
; maxLen
++)
534 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
536 distances
[0] = maxLen
;
537 distances
[1] = delta2
- 1;
539 if (maxLen
== lenLimit
)
541 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
545 GET_MATCHES_FOOTER(offset
, maxLen
)
548 static UInt32
Bt4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
550 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
551 GET_MATCHES_HEADER(4)
555 delta2
= p
->pos
- p
->hash
[ hash2Value
];
556 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
557 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
559 p
->hash
[ hash2Value
] =
560 p
->hash
[kFix3HashSize
+ hash3Value
] =
561 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
565 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
567 distances
[0] = maxLen
= 2;
568 distances
[1] = delta2
- 1;
571 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
574 distances
[offset
+ 1] = delta3
- 1;
580 for (; maxLen
!= lenLimit
; maxLen
++)
581 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
583 distances
[offset
- 2] = maxLen
;
584 if (maxLen
== lenLimit
)
586 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
592 GET_MATCHES_FOOTER(offset
, maxLen
)
595 static UInt32
Hc4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
597 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
598 GET_MATCHES_HEADER(4)
602 delta2
= p
->pos
- p
->hash
[ hash2Value
];
603 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
604 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
606 p
->hash
[ hash2Value
] =
607 p
->hash
[kFix3HashSize
+ hash3Value
] =
608 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
612 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
614 distances
[0] = maxLen
= 2;
615 distances
[1] = delta2
- 1;
618 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
621 distances
[offset
+ 1] = delta3
- 1;
627 for (; maxLen
!= lenLimit
; maxLen
++)
628 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
630 distances
[offset
- 2] = maxLen
;
631 if (maxLen
== lenLimit
)
633 p
->son
[p
->cyclicBufferPos
] = curMatch
;
639 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
640 distances
+ offset
, maxLen
) - (distances
));
644 UInt32
Hc3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
647 GET_MATCHES_HEADER(3)
649 curMatch
= p
->hash
[hashValue
];
650 p
->hash
[hashValue
] = p
->pos
;
651 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
652 distances
, 2) - (distances
));
656 static void Bt2_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
662 curMatch
= p
->hash
[hashValue
];
663 p
->hash
[hashValue
] = p
->pos
;
669 void Bt3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
675 curMatch
= p
->hash
[hashValue
];
676 p
->hash
[hashValue
] = p
->pos
;
682 static void Bt3_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
689 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
690 p
->hash
[hash2Value
] =
691 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
697 static void Bt4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
701 UInt32 hash2Value
, hash3Value
;
704 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
705 p
->hash
[ hash2Value
] =
706 p
->hash
[kFix3HashSize
+ hash3Value
] = p
->pos
;
707 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
713 static void Hc4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
717 UInt32 hash2Value
, hash3Value
;
720 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
721 p
->hash
[ hash2Value
] =
722 p
->hash
[kFix3HashSize
+ hash3Value
] =
723 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
724 p
->son
[p
->cyclicBufferPos
] = curMatch
;
730 void Hc3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
736 curMatch
= p
->hash
[hashValue
];
737 p
->hash
[hashValue
] = p
->pos
;
738 p
->son
[p
->cyclicBufferPos
] = curMatch
;
744 void MatchFinder_CreateVTable(CMatchFinder
*p
, IMatchFinder
*vTable
)
746 vTable
->Init
= (Mf_Init_Func
)MatchFinder_Init
;
747 vTable
->GetIndexByte
= (Mf_GetIndexByte_Func
)MatchFinder_GetIndexByte
;
748 vTable
->GetNumAvailableBytes
= (Mf_GetNumAvailableBytes_Func
)MatchFinder_GetNumAvailableBytes
;
749 vTable
->GetPointerToCurrentPos
= (Mf_GetPointerToCurrentPos_Func
)MatchFinder_GetPointerToCurrentPos
;
752 vTable
->GetMatches
= (Mf_GetMatches_Func
)Hc4_MatchFinder_GetMatches
;
753 vTable
->Skip
= (Mf_Skip_Func
)Hc4_MatchFinder_Skip
;
755 else if (p
->numHashBytes
== 2)
757 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt2_MatchFinder_GetMatches
;
758 vTable
->Skip
= (Mf_Skip_Func
)Bt2_MatchFinder_Skip
;
760 else if (p
->numHashBytes
== 3)
762 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt3_MatchFinder_GetMatches
;
763 vTable
->Skip
= (Mf_Skip_Func
)Bt3_MatchFinder_Skip
;
767 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt4_MatchFinder_GetMatches
;
768 vTable
->Skip
= (Mf_Skip_Func
)Bt4_MatchFinder_Skip
;