From: Liming Gao Date: Wed, 29 Aug 2018 00:51:28 +0000 (+0800) Subject: BaseTools Lzma: Update LZMA SDK version to 18.05 X-Git-Tag: edk2-stable201903~891 X-Git-Url: https://git.proxmox.com/?p=mirror_edk2.git;a=commitdiff_plain;h=5ec5a236d1fe64b2c40075b6738b74bdf7e1acdc BaseTools Lzma: Update LZMA SDK version to 18.05 https://bugzilla.tianocore.org/show_bug.cgi?id=1006 New formal release in https://www.7-zip.org/sdk.html is 18.05. Contributed-under: TianoCore Contribution Agreement 1.1 Signed-off-by: Liming Gao Reviewed-by: Star Zeng --- diff --git a/BaseTools/Source/C/LzmaCompress/LZMA-SDK-README.txt b/BaseTools/Source/C/LzmaCompress/LZMA-SDK-README.txt index bc9e29fd9f..7cf967a774 100644 --- a/BaseTools/Source/C/LzmaCompress/LZMA-SDK-README.txt +++ b/BaseTools/Source/C/LzmaCompress/LZMA-SDK-README.txt @@ -1,3 +1,3 @@ -LzmaCompress is based on the LZMA SDK 16.04. LZMA SDK 16.04 -was placed in the public domain on 2016-10-04. It was +LzmaCompress is based on the LZMA SDK 18.05. LZMA SDK 18.05 +was placed in the public domain on 2018-04-30. It was released on the http://www.7-zip.org/sdk.html website. diff --git a/BaseTools/Source/C/LzmaCompress/LzmaCompress.c b/BaseTools/Source/C/LzmaCompress/LzmaCompress.c index ceb6a5bc77..da09b9f748 100644 --- a/BaseTools/Source/C/LzmaCompress/LzmaCompress.c +++ b/BaseTools/Source/C/LzmaCompress/LzmaCompress.c @@ -1,9 +1,9 @@ /** @file LZMA Compress/Decompress tool (LzmaCompress) - Based on LZMA SDK 16.04: + Based on LZMA SDK 18.05: LzmaUtil.c -- Test application for LZMA compression - 2016-10-04 : Igor Pavlov : Public domain + 2018-04-30 : Igor Pavlov : Public domain Copyright (c) 2006 - 2018, Intel Corporation. All rights reserved.
This program and the accompanying materials @@ -341,14 +341,14 @@ int main2(int numArgs, const char *args[], char *rs) if (!mQuietMode) { printf("Encoding\n"); } - res = Encode(&outStream.s, &inStream.s, fileSize); + res = Encode(&outStream.vt, &inStream.vt, fileSize); } else { if (!mQuietMode) { printf("Decoding\n"); } - res = Decode(&outStream.s, &inStream.s, fileSize); + res = Decode(&outStream.vt, &inStream.vt, fileSize); } File_Close(&outStream.file); diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.c index b2fed368a4..e486901e30 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.c @@ -1,5 +1,5 @@ /* 7zFile.c -- File IO -2009-11-24 : Igor Pavlov : Public domain */ +2017-04-03 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -213,7 +213,7 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) { #ifdef USE_WINDOWS_FILE - DWORD sizeHigh = 0; + DWORD sizeHigh; DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); if (sizeLow == 0xFFFFFFFF) { @@ -238,49 +238,49 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) /* ---------- FileSeqInStream ---------- */ -static SRes FileSeqInStream_Read(void *pp, void *buf, size_t *size) +static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size) { - CFileSeqInStream *p = (CFileSeqInStream *)pp; + CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt); return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; } void FileSeqInStream_CreateVTable(CFileSeqInStream *p) { - p->s.Read = FileSeqInStream_Read; + p->vt.Read = FileSeqInStream_Read; } /* ---------- FileInStream ---------- */ -static SRes FileInStream_Read(void *pp, void *buf, size_t *size) +static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size) { - CFileInStream *p = (CFileInStream *)pp; + CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; } -static SRes FileInStream_Seek(void *pp, Int64 *pos, ESzSeek origin) +static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin) { - CFileInStream *p = (CFileInStream *)pp; + CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); return File_Seek(&p->file, pos, origin); } void FileInStream_CreateVTable(CFileInStream *p) { - p->s.Read = FileInStream_Read; - p->s.Seek = FileInStream_Seek; + p->vt.Read = FileInStream_Read; + p->vt.Seek = FileInStream_Seek; } /* ---------- FileOutStream ---------- */ -static size_t FileOutStream_Write(void *pp, const void *data, size_t size) +static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) { - CFileOutStream *p = (CFileOutStream *)pp; + CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt); File_Write(&p->file, data, &size); return size; } void FileOutStream_CreateVTable(CFileOutStream *p) { - p->s.Write = FileOutStream_Write; + p->vt.Write = FileOutStream_Write; } diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.h index d62a192609..7e263bea1b 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zFile.h @@ -1,5 +1,5 @@ /* 7zFile.h -- File IO -2013-01-18 : Igor Pavlov : Public domain */ +2017-04-03 : Igor Pavlov : Public domain */ #ifndef __7Z_FILE_H #define __7Z_FILE_H @@ -54,7 +54,7 @@ WRes File_GetLength(CSzFile *p, UInt64 *length); typedef struct { - ISeqInStream s; + ISeqInStream vt; CSzFile file; } CFileSeqInStream; @@ -63,7 +63,7 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p); typedef struct { - ISeekInStream s; + ISeekInStream vt; CSzFile file; } CFileInStream; @@ -72,7 +72,7 @@ void FileInStream_CreateVTable(CFileInStream *p); typedef struct { - ISeqOutStream s; + ISeqOutStream vt; CSzFile file; } CFileOutStream; diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zStream.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zStream.c index 5a92d532cc..579741fadc 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zStream.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zStream.c @@ -1,5 +1,5 @@ /* 7zStream.c -- 7z Stream functions -2013-11-12 : Igor Pavlov : Public domain */ +2017-04-03 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,12 +7,12 @@ #include "7zTypes.h" -SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType) +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType) { while (size != 0) { size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); + RINOK(ISeqInStream_Read(stream, buf, &processed)); if (processed == 0) return errorType; buf = (void *)((Byte *)buf + processed); @@ -21,40 +21,42 @@ SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorT return SZ_OK; } -SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size) +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size) { return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); } -SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf) +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) { size_t processed = 1; - RINOK(stream->Read(stream, buf, &processed)); + RINOK(ISeqInStream_Read(stream, buf, &processed)); return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; } -SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset) + + +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) { Int64 t = offset; - return stream->Seek(stream, &t, SZ_SEEK_SET); + return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); } -SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size) +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size) { const void *lookBuf; if (*size == 0) return SZ_OK; - RINOK(stream->Look(stream, &lookBuf, size)); + RINOK(ILookInStream_Look(stream, &lookBuf, size)); memcpy(buf, lookBuf, *size); - return stream->Skip(stream, *size); + return ILookInStream_Skip(stream, *size); } -SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType) +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType) { while (size != 0) { size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); + RINOK(ILookInStream_Read(stream, buf, &processed)); if (processed == 0) return errorType; buf = (void *)((Byte *)buf + processed); @@ -63,61 +65,67 @@ SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes erro return SZ_OK; } -SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size) +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size) { return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); } -static SRes LookToRead_Look_Lookahead(void *pp, const void **buf, size_t *size) + + +#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt); + +static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size) { SRes res = SZ_OK; - CLookToRead *p = (CLookToRead *)pp; + GET_LookToRead2 size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) + if (size2 == 0 && *size != 0) { p->pos = 0; - size2 = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, &size2); + p->size = 0; + size2 = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, &size2); p->size = size2; } - if (size2 < *size) + if (*size > size2) *size = size2; *buf = p->buf + p->pos; return res; } -static SRes LookToRead_Look_Exact(void *pp, const void **buf, size_t *size) +static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size) { SRes res = SZ_OK; - CLookToRead *p = (CLookToRead *)pp; + GET_LookToRead2 size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) + if (size2 == 0 && *size != 0) { p->pos = 0; - if (*size > LookToRead_BUF_SIZE) - *size = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, size); + p->size = 0; + if (*size > p->bufSize) + *size = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, size); size2 = p->size = *size; } - if (size2 < *size) + if (*size > size2) *size = size2; *buf = p->buf + p->pos; return res; } -static SRes LookToRead_Skip(void *pp, size_t offset) +static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset) { - CLookToRead *p = (CLookToRead *)pp; + GET_LookToRead2 p->pos += offset; return SZ_OK; } -static SRes LookToRead_Read(void *pp, void *buf, size_t *size) +static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) { - CLookToRead *p = (CLookToRead *)pp; + GET_LookToRead2 size_t rem = p->size - p->pos; if (rem == 0) - return p->realStream->Read(p->realStream, buf, size); + return ISeekInStream_Read(p->realStream, buf, size); if (rem > *size) rem = *size; memcpy(buf, p->buf + p->pos, rem); @@ -126,46 +134,43 @@ static SRes LookToRead_Read(void *pp, void *buf, size_t *size) return SZ_OK; } -static SRes LookToRead_Seek(void *pp, Int64 *pos, ESzSeek origin) +static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin) { - CLookToRead *p = (CLookToRead *)pp; + GET_LookToRead2 p->pos = p->size = 0; - return p->realStream->Seek(p->realStream, pos, origin); + return ISeekInStream_Seek(p->realStream, pos, origin); } -void LookToRead_CreateVTable(CLookToRead *p, int lookahead) +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) { - p->s.Look = lookahead ? - LookToRead_Look_Lookahead : - LookToRead_Look_Exact; - p->s.Skip = LookToRead_Skip; - p->s.Read = LookToRead_Read; - p->s.Seek = LookToRead_Seek; + p->vt.Look = lookahead ? + LookToRead2_Look_Lookahead : + LookToRead2_Look_Exact; + p->vt.Skip = LookToRead2_Skip; + p->vt.Read = LookToRead2_Read; + p->vt.Seek = LookToRead2_Seek; } -void LookToRead_Init(CLookToRead *p) -{ - p->pos = p->size = 0; -} -static SRes SecToLook_Read(void *pp, void *buf, size_t *size) + +static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size) { - CSecToLook *p = (CSecToLook *)pp; + CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt); return LookInStream_LookRead(p->realStream, buf, size); } void SecToLook_CreateVTable(CSecToLook *p) { - p->s.Read = SecToLook_Read; + p->vt.Read = SecToLook_Read; } -static SRes SecToRead_Read(void *pp, void *buf, size_t *size) +static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size) { - CSecToRead *p = (CSecToRead *)pp; - return p->realStream->Read(p->realStream, buf, size); + CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt); + return ILookInStream_Read(p->realStream, buf, size); } void SecToRead_CreateVTable(CSecToRead *p) { - p->s.Read = SecToRead_Read; + p->vt.Read = SecToRead_Read; } diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zTypes.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zTypes.h index 903047b10f..4977cdaa66 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zTypes.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zTypes.h @@ -1,5 +1,5 @@ /* 7zTypes.h -- Basic types -2013-11-12 : Igor Pavlov : Public domain */ +2017-07-17 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H @@ -42,13 +42,23 @@ EXTERN_C_BEGIN typedef int SRes; + #ifdef _WIN32 + /* typedef DWORD WRes; */ typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + #else + typedef int WRes; +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_WIN32 +#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) + #endif + #ifndef RINOK #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #endif @@ -112,48 +122,72 @@ typedef int Bool; #define MY_NO_INLINE #endif +#define MY_FORCE_INLINE __forceinline + #define MY_CDECL __cdecl #define MY_FAST_CALL __fastcall #else #define MY_NO_INLINE +#define MY_FORCE_INLINE #define MY_CDECL #define MY_FAST_CALL +/* inline keyword : for C++ / C99 */ + +/* GCC, clang: */ +/* +#if defined (__GNUC__) && (__GNUC__ >= 4) +#define MY_FORCE_INLINE __attribute__((always_inline)) +#define MY_NO_INLINE __attribute__((noinline)) +#endif +*/ + #endif /* The following interfaces use first parameter as pointer to structure */ -typedef struct +typedef struct IByteIn IByteIn; +struct IByteIn { - Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ -} IByteIn; + Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) -typedef struct + +typedef struct IByteOut IByteOut; +struct IByteOut { - void (*Write)(void *p, Byte b); -} IByteOut; + void (*Write)(const IByteOut *p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) -typedef struct + +typedef struct ISeqInStream ISeqInStream; +struct ISeqInStream { - SRes (*Read)(void *p, void *buf, size_t *size); + SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) < input(*size)) is allowed */ -} ISeqInStream; +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) /* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf); +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); -typedef struct + +typedef struct ISeqOutStream ISeqOutStream; +struct ISeqOutStream { - size_t (*Write)(void *p, const void *buf, size_t size); + size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); /* Returns: result - the number of actually written bytes. (result < size) means error */ -} ISeqOutStream; +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) typedef enum { @@ -162,78 +196,162 @@ typedef enum SZ_SEEK_END = 2 } ESzSeek; -typedef struct + +typedef struct ISeekInStream ISeekInStream; +struct ISeekInStream { - SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); -} ISeekInStream; + SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -typedef struct + +typedef struct ILookInStream ILookInStream; +struct ILookInStream { - SRes (*Look)(void *p, const void **buf, size_t *size); + SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) > input(*size)) is not allowed (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(void *p, size_t offset); + SRes (*Skip)(const ILookInStream *p, size_t offset); /* offset must be <= output(*size) of Look */ - SRes (*Read)(void *p, void *buf, size_t *size); + SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); -} ILookInStream; + SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); +}; -SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); /* reads via ILookInStream::Read */ -SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); + -#define LookToRead_BUF_SIZE (1 << 14) typedef struct { - ILookInStream s; - ISeekInStream *realStream; + ILookInStream vt; + const ISeekInStream *realStream; + size_t pos; - size_t size; - Byte buf[LookToRead_BUF_SIZE]; -} CLookToRead; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } -void LookToRead_CreateVTable(CLookToRead *p, int lookahead); -void LookToRead_Init(CLookToRead *p); typedef struct { - ISeqInStream s; - ILookInStream *realStream; + ISeqInStream vt; + const ILookInStream *realStream; } CSecToLook; void SecToLook_CreateVTable(CSecToLook *p); + + typedef struct { - ISeqInStream s; - ILookInStream *realStream; + ISeqInStream vt; + const ILookInStream *realStream; } CSecToRead; void SecToRead_CreateVTable(CSecToRead *p); -typedef struct + +typedef struct ICompressProgress ICompressProgress; + +struct ICompressProgress { - SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); + SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); /* Returns: result. (result != SZ_OK) means break. Value (UInt64)(Int64)-1 for size means unknown value. */ -} ICompressProgress; +}; +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) -typedef struct + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc { - void *(*Alloc)(void *p, size_t size); - void (*Free)(void *p, void *address); /* address can be 0 */ -} ISzAlloc; + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef MY_container_of + +/* +#define MY_container_of(ptr, type, m) container_of(ptr, type, m) +#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) + + +#endif + +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) + +/* +#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +*/ +#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) + +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +*/ + -#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) -#define IAlloc_Free(p, a) (p)->Free((p), a) #ifdef _WIN32 diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zVersion.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zVersion.h index acb67a94e5..ed3aa94270 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/7zVersion.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/7zVersion.h @@ -1,14 +1,21 @@ -#define MY_VER_MAJOR 16 -#define MY_VER_MINOR 04 +#define MY_VER_MAJOR 18 +#define MY_VER_MINOR 05 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "16.04" -#define MY_VERSION "16.04" -#define MY_DATE "2016-10-04" +#define MY_VERSION_NUMBERS "18.05" +#define MY_VERSION MY_VERSION_NUMBERS + +#ifdef MY_CPU_NAME + #define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")" +#else + #define MY_VERSION_CPU MY_VERSION +#endif + +#define MY_DATE "2018-04-30" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2016 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR @@ -16,4 +23,5 @@ #define MY_COPYRIGHT MY_COPYRIGHT_PD #endif -#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " : " MY_COPYRIGHT " : " MY_DATE +#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE +#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.c index 9f1d036afe..30b499e5ff 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.c @@ -1,8 +1,10 @@ /* Alloc.c -- Memory allocation functions -2015-02-21 : Igor Pavlov : Public domain */ +2018-04-27 : Igor Pavlov : Public domain */ #include "Precomp.h" +#include + #ifdef _WIN32 #include #endif @@ -14,20 +16,127 @@ /* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ #ifdef _SZ_ALLOC_DEBUG + #include int g_allocCount = 0; int g_allocCountMid = 0; int g_allocCountBig = 0; + + +#define CONVERT_INT_TO_STR(charType, tempSize) \ + unsigned char temp[tempSize]; unsigned i = 0; \ + while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ + *s++ = (charType)('0' + (unsigned)val); \ + while (i != 0) { i--; *s++ = temp[i]; } \ + *s = 0; + +static void ConvertUInt64ToString(UInt64 val, char *s) +{ + CONVERT_INT_TO_STR(char, 24); +} + +#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + +static void ConvertUInt64ToHex(UInt64 val, char *s) +{ + UInt64 v = val; + unsigned i; + for (i = 1;; i++) + { + v >>= 4; + if (v == 0) + break; + } + s[i] = 0; + do + { + unsigned t = (unsigned)(val & 0xF); + val >>= 4; + s[--i] = GET_HEX_CHAR(t); + } + while (i); +} + +#define DEBUG_OUT_STREAM stderr + +static void Print(const char *s) +{ + fputs(s, DEBUG_OUT_STREAM); +} + +static void PrintAligned(const char *s, size_t align) +{ + size_t len = strlen(s); + for(;;) + { + fputc(' ', DEBUG_OUT_STREAM); + if (len >= align) + break; + ++len; + } + Print(s); +} + +static void PrintLn() +{ + Print("\n"); +} + +static void PrintHex(UInt64 v, size_t align) +{ + char s[32]; + ConvertUInt64ToHex(v, s); + PrintAligned(s, align); +} + +static void PrintDec(UInt64 v, size_t align) +{ + char s[32]; + ConvertUInt64ToString(v, s); + PrintAligned(s, align); +} + +static void PrintAddr(void *p) +{ + PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12); +} + + +#define PRINT_ALLOC(name, cnt, size, ptr) \ + Print(name " "); \ + PrintDec(cnt++, 10); \ + PrintHex(size, 10); \ + PrintAddr(ptr); \ + PrintLn(); + +#define PRINT_FREE(name, cnt, ptr) if (ptr) { \ + Print(name " "); \ + PrintDec(--cnt, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#else + +#define PRINT_ALLOC(name, cnt, size, ptr) +#define PRINT_FREE(name, cnt, ptr) +#define Print(s) +#define PrintLn() +#define PrintHex(v, align) +#define PrintDec(v, align) +#define PrintAddr(p) + #endif + + void *MyAlloc(size_t size) { if (size == 0) - return 0; + return NULL; #ifdef _SZ_ALLOC_DEBUG { void *p = malloc(size); - fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); + PRINT_ALLOC("Alloc ", g_allocCount, size, p); return p; } #else @@ -37,10 +146,8 @@ void *MyAlloc(size_t size) void MyFree(void *address) { - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); - #endif + PRINT_FREE("Free ", g_allocCount, address); + free(address); } @@ -49,20 +156,18 @@ void MyFree(void *address) void *MidAlloc(size_t size) { if (size == 0) - return 0; - #ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); - #endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); + return NULL; + + PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); } void MidFree(void *address) { - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); - #endif - if (address == 0) + PRINT_FREE("Free-Mid", g_allocCountMid, address); + + if (!address) return; VirtualFree(address, 0, MEM_RELEASE); } @@ -79,10 +184,10 @@ typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); void SetLargePageSize() { #ifdef _7ZIP_LARGE_PAGES - SIZE_T size = 0; + SIZE_T size; GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (largePageMinimum == 0) + if (!largePageMinimum) return; size = largePageMinimum(); if (size == 0 || (size & (size - 1)) != 0) @@ -95,31 +200,36 @@ void SetLargePageSize() void *BigAlloc(size_t size) { if (size == 0) - return 0; - #ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); - #endif + return NULL; + + PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); #ifdef _7ZIP_LARGE_PAGES - if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) { - void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), - MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - if (res != 0) - return res; + SIZE_T ps = g_LargePageSize; + if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) + { + size_t size2; + ps--; + size2 = (size + ps) & ~ps; + if (size2 >= size) + { + void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (res) + return res; + } + } } #endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); } void BigFree(void *address) { - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); - #endif + PRINT_FREE("Free-Big", g_allocCountBig, address); - if (address == 0) + if (!address) return; VirtualFree(address, 0, MEM_RELEASE); } @@ -127,10 +237,219 @@ void BigFree(void *address) #endif -static void *SzAlloc(void *p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } -static void SzFree(void *p, void *address) { UNUSED_VAR(p); MyFree(address); } -ISzAlloc g_Alloc = { SzAlloc, SzFree }; +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } +const ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } +static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } +const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; + +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; + + +/* + uintptr_t : C99 (optional) + : unsupported in VS6 +*/ + +#ifdef _WIN32 + typedef UINT_PTR UIntPtr; +#else + /* + typedef uintptr_t UIntPtr; + */ + typedef ptrdiff_t UIntPtr; +#endif + + +#define ADJUST_ALLOC_SIZE 0 +/* +#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) +*/ +/* + Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if + MyAlloc() can return address that is NOT multiple of sizeof(void *). +*/ + + +/* +#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) +*/ +#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) + +#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) + + +#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32) + #define USE_posix_memalign +#endif + +/* + This posix_memalign() is for test purposes only. + We also need special Free() function instead of free(), + if this posix_memalign() is used. +*/ + +/* +static int posix_memalign(void **ptr, size_t align, size_t size) +{ + size_t newSize = size + align; + void *p; + void *pAligned; + *ptr = NULL; + if (newSize < size) + return 12; // ENOMEM + p = MyAlloc(newSize); + if (!p) + return 12; // ENOMEM + pAligned = MY_ALIGN_PTR_UP_PLUS(p, align); + ((void **)pAligned)[-1] = p; + *ptr = pAligned; + return 0; +} +*/ + +/* + ALLOC_ALIGN_SIZE >= sizeof(void *) + ALLOC_ALIGN_SIZE >= cache_line_size +*/ + +#define ALLOC_ALIGN_SIZE ((size_t)1 << 7) + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + #ifndef USE_posix_memalign + + void *p; + void *pAligned; + size_t newSize; + UNUSED_VAR(pp); + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + + newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + p = MyAlloc(newSize); + + if (!p) + return NULL; + pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE); + + Print(" size="); PrintHex(size, 8); + Print(" a_size="); PrintHex(newSize, 8); + Print(" ptr="); PrintAddr(p); + Print(" a_ptr="); PrintAddr(pAligned); + PrintLn(); + + ((void **)pAligned)[-1] = p; + + return pAligned; + + #else + + void *p; + UNUSED_VAR(pp); + if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) + return NULL; + + Print(" posix_memalign="); PrintAddr(p); + PrintLn(); + + return p; + + #endif +} + + +static void SzAlignedFree(ISzAllocPtr pp, void *address) +{ + UNUSED_VAR(pp); + #ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); + #else + free(address); + #endif +} + + +const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; + + + +#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + +/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ +#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +/* +#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] +*/ + +static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) +{ + CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + void *adr; + void *pAligned; + size_t newSize; + size_t extra; + size_t alignSize = (size_t)1 << p->numAlignBits; + + if (alignSize < sizeof(void *)) + alignSize = sizeof(void *); + + if (p->offset >= alignSize) + return NULL; + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + extra = p->offset & (sizeof(void *) - 1); + newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; -static void *SzBigAlloc(void *p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } -static void SzBigFree(void *p, void *address) { UNUSED_VAR(p); BigFree(address); } -ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; + adr = ISzAlloc_Alloc(p->baseAlloc, newSize); + + if (!adr) + return NULL; + + pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; + + PrintLn(); + Print("- Aligned: "); + Print(" size="); PrintHex(size, 8); + Print(" a_size="); PrintHex(newSize, 8); + Print(" ptr="); PrintAddr(adr); + Print(" a_ptr="); PrintAddr(pAligned); + PrintLn(); + + REAL_BLOCK_PTR_VAR(pAligned) = adr; + + return pAligned; +} + + +static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) +{ + if (address) + { + CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + PrintLn(); + Print("- Aligned Free: "); + PrintLn(); + ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); + } +} + + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) +{ + p->vt.Alloc = AlignOffsetAlloc_Alloc; + p->vt.Free = AlignOffsetAlloc_Free; +} diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.h index 73b282a071..3d796e5eee 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/Alloc.h @@ -1,5 +1,5 @@ /* Alloc.h -- Memory allocation functions -2015-02-21 : Igor Pavlov : Public domain */ +2018-02-19 : Igor Pavlov : Public domain */ #ifndef __COMMON_ALLOC_H #define __COMMON_ALLOC_H @@ -29,8 +29,22 @@ void BigFree(void *address); #endif -extern ISzAlloc g_Alloc; -extern ISzAlloc g_BigAlloc; +extern const ISzAlloc g_Alloc; +extern const ISzAlloc g_BigAlloc; +extern const ISzAlloc g_MidAlloc; +extern const ISzAlloc g_AlignedAlloc; + + +typedef struct +{ + ISzAlloc vt; + ISzAllocPtr baseAlloc; + unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */ + size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */ +} CAlignOffsetAlloc; + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p); + EXTERN_C_END diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/Bra86.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/Bra86.c index 8dd3ed48d9..a6463c63ba 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/Bra86.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/Bra86.c @@ -1,5 +1,5 @@ /* Bra86.c -- Converter for x86 code (BCJ) -2013-11-12 : Igor Pavlov : Public domain */ +2017-04-03 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -37,7 +37,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding else { mask >>= (unsigned)d; - if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(mask >> 1) + 1]))) + if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1]))) { mask = (mask >> 1) | 4; pos++; diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/Compiler.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/Compiler.h index de8fab3749..c788648cd2 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/Compiler.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/Compiler.h @@ -1,5 +1,5 @@ /* Compiler.h -2015-08-02 : Igor Pavlov : Public domain */ +2017-04-03 : Igor Pavlov : Public domain */ #ifndef __7Z_COMPILER_H #define __7Z_COMPILER_H @@ -21,6 +21,7 @@ #pragma warning(disable : 4514) // unreferenced inline function has been removed #pragma warning(disable : 4702) // unreachable code #pragma warning(disable : 4710) // not inlined + #pragma warning(disable : 4714) // function marked as __forceinline not inlined #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information #endif diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/CpuArch.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/CpuArch.h index ef6083c3b8..7fb27282c7 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/CpuArch.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2016-06-09: Igor Pavlov : Public domain */ +2017-09-04 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H @@ -16,48 +16,122 @@ If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of pl MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. */ -#if defined(_M_X64) \ - || defined(_M_AMD64) \ - || defined(__x86_64__) \ - || defined(__AMD64__) \ - || defined(__amd64__) +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #else + #define MY_CPU_NAME "x64" + #endif + #define MY_CPU_64BIT #endif -#if defined(MY_CPU_AMD64) \ - || defined(_M_IA64) \ - || defined(__AARCH64EL__) \ - || defined(__AARCH64EB__) + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + #define MY_CPU_32BIT +#endif + + +#if defined(_M_ARM64) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 + #define MY_CPU_NAME "arm64" #define MY_CPU_64BIT #endif -#if defined(_M_IX86) || defined(__i386__) -#define MY_CPU_X86 + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + #define MY_CPU_NAME "arm" + #define MY_CPU_32BIT #endif -#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) -#define MY_CPU_X86_OR_AMD64 + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT #endif -#if defined(MY_CPU_X86) \ - || defined(_M_ARM) \ - || defined(__ARMEL__) \ - || defined(__THUMBEL__) \ - || defined(__ARMEB__) \ - || defined(__THUMBEB__) + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #else + #define MY_CPU_NAME "ppc64" + #endif + #define MY_CPU_64BIT +#elif defined(__ppc__) \ + || defined(__powerpc__) + #define MY_CPU_NAME "ppc" #define MY_CPU_32BIT #endif -#if defined(_WIN32) && defined(_M_ARM) -#define MY_CPU_ARM_LE + +#if defined(__sparc64__) + #define MY_CPU_NAME "sparc64" + #define MY_CPU_64BIT +#elif defined(__sparc__) + #define MY_CPU_NAME "sparc" + /* #define MY_CPU_32BIT */ #endif -#if defined(_WIN32) && defined(_M_IA64) -#define MY_CPU_IA64_LE + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 #endif + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_IA64_LE) \ || defined(__LITTLE_ENDIAN__) \ || defined(__ARMEL__) \ @@ -86,14 +160,37 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_BE #endif + #if defined(MY_CPU_LE) && defined(MY_CPU_BE) -Stop_Compiling_Bad_Endian + #error Stop_Compiling_Bad_Endian +#endif + + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT #endif +#ifndef MY_CPU_NAME + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - /* || defined(__AARCH64EL__) */ + || defined(MY_CPU_ARM64) \ + || defined(__ARM_FEATURE_UNALIGNED) #define MY_CPU_LE_UNALIGN #endif #endif @@ -139,6 +236,11 @@ Stop_Compiling_Bad_Endian #endif +#ifdef __has_builtin + #define MY__has_builtin(x) __has_builtin(x) +#else + #define MY__has_builtin(x) 0 +#endif #if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300) @@ -146,15 +248,21 @@ Stop_Compiling_Bad_Endian #include +#pragma intrinsic(_byteswap_ushort) #pragma intrinsic(_byteswap_ulong) #pragma intrinsic(_byteswap_uint64) + +/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ #define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) #define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) -#elif defined(MY_CPU_LE_UNALIGN) && defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +#elif defined(MY_CPU_LE_UNALIGN) && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */ #define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p)) #define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p)) @@ -179,10 +287,14 @@ Stop_Compiling_Bad_Endian #endif +#ifndef GetBe16 + #define GetBe16(p) ( (UInt16) ( \ ((UInt16)((const Byte *)(p))[0] << 8) | \ ((const Byte *)(p))[1] )) +#endif + #ifdef MY_CPU_X86_OR_AMD64 diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.c index c335d363ce..6ea82a9b53 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2015-10-15 : Igor Pavlov : Public domain */ +2017-06-10 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -16,18 +16,18 @@ #define kStartMaxLen 3 -static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc) +static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { if (!p->directInput) { - alloc->Free(alloc, p->bufferBase); + ISzAlloc_Free(alloc, p->bufferBase); p->bufferBase = NULL; } } /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ -static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc) +static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) { UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; if (p->directInput) @@ -39,7 +39,7 @@ static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *a { LzInWindow_Free(p, alloc); p->blockSize = blockSize; - p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize); + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); } return (p->bufferBase != NULL); } @@ -81,7 +81,7 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) if (size == 0) return; - p->result = p->stream->Read(p->stream, dest, &size); + p->result = ISeqInStream_Read(p->stream, dest, &size); if (p->result != SZ_OK) return; if (size == 0) @@ -142,6 +142,7 @@ void MatchFinder_Construct(CMatchFinder *p) p->bufferBase = NULL; p->directInput = 0; p->hash = NULL; + p->expectedDataSize = (UInt64)(Int64)-1; MatchFinder_SetDefaultSettings(p); for (i = 0; i < 256; i++) @@ -149,34 +150,34 @@ void MatchFinder_Construct(CMatchFinder *p) UInt32 r = i; unsigned j; for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); p->crc[i] = r; } } -static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc) +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->hash); + ISzAlloc_Free(alloc, p->hash); p->hash = NULL; } -void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc) +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) { MatchFinder_FreeThisClassMemory(p, alloc); LzInWindow_Free(p, alloc); } -static CLzRef* AllocRefs(size_t num, ISzAlloc *alloc) +static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) { size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) return NULL; - return (CLzRef *)alloc->Alloc(alloc, sizeInBytes); + return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); } int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc *alloc) + ISzAllocPtr alloc) { UInt32 sizeReserv; @@ -208,7 +209,11 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs = (1 << 16) - 1; else { - hs = historySize - 1; + hs = historySize; + if (hs > p->expectedDataSize) + hs = (UInt32)p->expectedDataSize; + if (hs != 0) + hs--; hs |= (hs >> 1); hs |= (hs >> 2); hs |= (hs >> 4); @@ -292,17 +297,33 @@ static void MatchFinder_SetLimits(CMatchFinder *p) p->posLimit = p->pos + limit; } -void MatchFinder_Init_2(CMatchFinder *p, int readData) + +void MatchFinder_Init_LowHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash; + size_t numItems = p->fixedHashSize; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_HighHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash + p->fixedHashSize; + size_t numItems = (size_t)p->hashMask + 1; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_3(CMatchFinder *p, int readData) { - UInt32 i; - UInt32 *hash = p->hash; - UInt32 num = p->hashSizeSum; - for (i = 0; i < num; i++) - hash[i] = kEmptyHashValue; - p->cyclicBufferPos = 0; p->buffer = p->bufferBase; - p->pos = p->streamPos = p->cyclicBufferSize; + p->pos = + p->streamPos = p->cyclicBufferSize; p->result = SZ_OK; p->streamEndWasReached = 0; @@ -312,10 +333,14 @@ void MatchFinder_Init_2(CMatchFinder *p, int readData) MatchFinder_SetLimits(p); } + void MatchFinder_Init(CMatchFinder *p) { - MatchFinder_Init_2(p, True); + MatchFinder_Init_HighHash(p); + MatchFinder_Init_LowHash(p); + MatchFinder_Init_3(p, True); } + static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) { @@ -558,10 +583,10 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash[h2]; - curMatch = hash[kFix3HashSize + hv]; + curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = pos; - hash[kFix3HashSize + hv] = pos; + (hash + kFix3HashSize)[hv] = pos; maxLen = 2; offset = 0; @@ -594,13 +619,13 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) pos = p->pos; d2 = pos - hash[ h2]; - d3 = pos - hash[kFix3HashSize + h3]; + d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = hash[kFix4HashSize + hv]; + curMatch = (hash + kFix4HashSize)[hv]; hash[ h2] = pos; - hash[kFix3HashSize + h3] = pos; - hash[kFix4HashSize + hv] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; maxLen = 0; offset = 0; @@ -615,7 +640,7 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) { maxLen = 3; - distances[offset + 1] = d3 - 1; + distances[(size_t)offset + 1] = d3 - 1; offset += 2; d2 = d3; } @@ -623,7 +648,7 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (offset != 0) { UPDATE_maxLen - distances[offset - 2] = maxLen; + distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) { SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); @@ -650,15 +675,15 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) pos = p->pos; d2 = pos - hash[ h2]; - d3 = pos - hash[kFix3HashSize + h3]; - d4 = pos - hash[kFix4HashSize + h4]; + d3 = pos - (hash + kFix3HashSize)[h3]; + d4 = pos - (hash + kFix4HashSize)[h4]; - curMatch = hash[kFix5HashSize + hv]; + curMatch = (hash + kFix5HashSize)[hv]; hash[ h2] = pos; - hash[kFix3HashSize + h3] = pos; - hash[kFix4HashSize + h4] = pos; - hash[kFix5HashSize + hv] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; maxLen = 0; offset = 0; @@ -691,7 +716,7 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) && *(cur - d4 + 3) == *(cur + 3)) { maxLen = 4; - distances[offset + 1] = d4 - 1; + distances[(size_t)offset + 1] = d4 - 1; offset += 2; d2 = d4; } @@ -699,7 +724,7 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (offset != 0) { UPDATE_maxLen - distances[offset - 2] = maxLen; + distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) { SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); @@ -726,13 +751,13 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) pos = p->pos; d2 = pos - hash[ h2]; - d3 = pos - hash[kFix3HashSize + h3]; + d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = hash[kFix4HashSize + hv]; + curMatch = (hash + kFix4HashSize)[hv]; hash[ h2] = pos; - hash[kFix3HashSize + h3] = pos; - hash[kFix4HashSize + hv] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; maxLen = 0; offset = 0; @@ -747,7 +772,7 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) { maxLen = 3; - distances[offset + 1] = d3 - 1; + distances[(size_t)offset + 1] = d3 - 1; offset += 2; d2 = d3; } @@ -755,7 +780,7 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (offset != 0) { UPDATE_maxLen - distances[offset - 2] = maxLen; + distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; @@ -784,15 +809,15 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) pos = p->pos; d2 = pos - hash[ h2]; - d3 = pos - hash[kFix3HashSize + h3]; - d4 = pos - hash[kFix4HashSize + h4]; + d3 = pos - (hash + kFix3HashSize)[h3]; + d4 = pos - (hash + kFix4HashSize)[h4]; - curMatch = hash[kFix5HashSize + hv]; + curMatch = (hash + kFix5HashSize)[hv]; hash[ h2] = pos; - hash[kFix3HashSize + h3] = pos; - hash[kFix4HashSize + h4] = pos; - hash[kFix5HashSize + hv] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; maxLen = 0; offset = 0; @@ -825,7 +850,7 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) && *(cur - d4 + 3) == *(cur + 3)) { maxLen = 4; - distances[offset + 1] = d4 - 1; + distances[(size_t)offset + 1] = d4 - 1; offset += 2; d2 = d4; } @@ -833,7 +858,7 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (offset != 0) { UPDATE_maxLen - distances[offset - 2] = maxLen; + distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; @@ -897,9 +922,9 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_HEADER(3) HASH3_CALC; hash = p->hash; - curMatch = hash[kFix3HashSize + hv]; + curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = - hash[kFix3HashSize + hv] = p->pos; + (hash + kFix3HashSize)[hv] = p->pos; SKIP_FOOTER } while (--num != 0); @@ -914,10 +939,10 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_HEADER(4) HASH4_CALC; hash = p->hash; - curMatch = hash[kFix4HashSize + hv]; + curMatch = (hash + kFix4HashSize)[hv]; hash[ h2] = - hash[kFix3HashSize + h3] = - hash[kFix4HashSize + hv] = p->pos; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; SKIP_FOOTER } while (--num != 0); @@ -933,11 +958,11 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_HEADER(5) HASH5_CALC; hash = p->hash; - curMatch = hash[kFix5HashSize + hv]; + curMatch = (hash + kFix5HashSize)[hv]; hash[ h2] = - hash[kFix3HashSize + h3] = - hash[kFix4HashSize + h4] = - hash[kFix5HashSize + hv] = p->pos; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; SKIP_FOOTER } while (--num != 0); @@ -953,10 +978,10 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_HEADER(4) HASH4_CALC; hash = p->hash; - curMatch = hash[kFix4HashSize + hv]; + curMatch = (hash + kFix4HashSize)[hv]; hash[ h2] = - hash[kFix3HashSize + h3] = - hash[kFix4HashSize + hv] = p->pos; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } @@ -973,11 +998,11 @@ static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_HEADER(5) HASH5_CALC; hash = p->hash; - curMatch = p->hash[kFix5HashSize + hv]; + curMatch = hash + kFix5HashSize)[hv]; hash[ h2] = - hash[kFix3HashSize + h3] = - hash[kFix4HashSize + h4] = - hash[kFix5HashSize + hv] = p->pos; + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.h index 2ff6673771..c77added7b 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFind.h @@ -1,5 +1,5 @@ /* LzFind.h -- Match finder for LZ algorithms -2015-10-15 : Igor Pavlov : Public domain */ +2017-06-10 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H @@ -47,6 +47,8 @@ typedef struct _CMatchFinder SRes result; UInt32 crc[256]; size_t numRefs; + + UInt64 expectedDataSize; } CMatchFinder; #define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) @@ -71,8 +73,8 @@ void MatchFinder_Construct(CMatchFinder *p); */ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc *alloc); -void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc); + ISzAllocPtr alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); @@ -103,7 +105,9 @@ typedef struct _IMatchFinder void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); -void MatchFinder_Init_2(CMatchFinder *p, int readData); +void MatchFinder_Init_LowHash(CMatchFinder *p); +void MatchFinder_Init_HighHash(CMatchFinder *p); +void MatchFinder_Init_3(CMatchFinder *p, int readData); void MatchFinder_Init(CMatchFinder *p); UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.c index cb61e0953a..2563824fcd 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2015-10-15 : Igor Pavlov : Public domain */ +2017-06-10 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -33,6 +33,8 @@ static void MtSync_GetNextBlock(CMtSync *p) Event_Set(&p->canStart); Event_Wait(&p->wasStarted); + + // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder); } else { @@ -155,6 +157,9 @@ static void HashThreadFunc(CMatchFinderMt *mt) UInt32 numProcessedBlocks = 0; Event_Wait(&p->canStart); Event_Set(&p->wasStarted); + + MatchFinder_Init_HighHash(mt->MatchFinder); + for (;;) { if (p->exit) @@ -205,7 +210,7 @@ static void HashThreadFunc(CMatchFinderMt *mt) if (num > kMtHashBlockSize - 2) num = kMtHashBlockSize - 2; mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); - heads[0] += num; + heads[0] = 2 + num; } mf->pos += num; mf->buffer += num; @@ -443,13 +448,13 @@ void MatchFinderMt_Construct(CMatchFinderMt *p) MtSync_Construct(&p->btSync); } -static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAlloc *alloc) +static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->hashBuf); + ISzAlloc_Free(alloc, p->hashBuf); p->hashBuf = NULL; } -void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc) +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) { MtSync_Destruct(&p->hashSync); MtSync_Destruct(&p->btSync); @@ -472,7 +477,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) } SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc) + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) { CMatchFinder *mf = p->MatchFinder; p->historySize = historySize; @@ -480,7 +485,7 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB return SZ_ERROR_PARAM; if (!p->hashBuf) { - p->hashBuf = (UInt32 *)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); + p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); if (!p->hashBuf) return SZ_ERROR_MEM; p->btBuf = p->hashBuf + kHashBufferSize; @@ -496,14 +501,18 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB } /* Call it after ReleaseStream / SetStream */ -void MatchFinderMt_Init(CMatchFinderMt *p) +static void MatchFinderMt_Init(CMatchFinderMt *p) { CMatchFinder *mf = p->MatchFinder; - p->btBufPos = p->btBufPosLimit = 0; - p->hashBufPos = p->hashBufPosLimit = 0; + + p->btBufPos = + p->btBufPosLimit = 0; + p->hashBufPos = + p->hashBufPosLimit = 0; /* Init without data reading. We don't want to read data in this thread */ - MatchFinder_Init_2(mf, False); + MatchFinder_Init_3(mf, False); + MatchFinder_Init_LowHash(mf); p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); p->btNumAvailBytes = 0; @@ -591,10 +600,10 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *dista MT_HASH3_CALC curMatch2 = hash[ h2]; - curMatch3 = hash[kFix3HashSize + h3]; + curMatch3 = (hash + kFix3HashSize)[h3]; hash[ h2] = lzPos; - hash[kFix3HashSize + h3] = lzPos; + (hash + kFix3HashSize)[h3] = lzPos; if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { @@ -627,12 +636,12 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distan MT_HASH4_CALC curMatch2 = hash[ h2]; - curMatch3 = hash[kFix3HashSize + h3]; - curMatch4 = hash[kFix4HashSize + h4]; + curMatch3 = (hash + kFix3HashSize)[h3]; + curMatch4 = (hash + kFix4HashSize)[h4]; hash[ h2] = lzPos; - hash[kFix3HashSize + h3] = lzPos; - hash[kFix4HashSize + h4] = lzPos; + (hash + kFix3HashSize)[h3] = lzPos; + (hash + kFix4HashSize)[h4] = lzPos; if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { @@ -684,8 +693,12 @@ static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) UInt32 i; for (i = 0; i < len; i += 2) { - *distances++ = *btBuf++; - *distances++ = *btBuf++; + UInt32 v0 = btBuf[0]; + UInt32 v1 = btBuf[1]; + btBuf += 2; + distances[0] = v0; + distances[1] = v1; + distances += 2; } } INCREASE_LZ_POS @@ -712,8 +725,12 @@ static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); do { - *distances2++ = *btBuf++; - *distances2++ = *btBuf++; + UInt32 v0 = btBuf[0]; + UInt32 v1 = btBuf[1]; + btBuf += 2; + distances2[0] = v0; + distances2[1] = v1; + distances2 += 2; } while ((len -= 2) != 0); len = (UInt32)(distances2 - (distances)); @@ -746,7 +763,7 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) SKIP_HEADER_MT(3) UInt32 h2, h3; MT_HASH3_CALC - hash[kFix3HashSize + h3] = + (hash + kFix3HashSize)[h3] = hash[ h2] = p->lzPos; SKIP_FOOTER_MT @@ -758,8 +775,8 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) SKIP_HEADER_MT(4) UInt32 h2, h3, h4; MT_HASH4_CALC - hash[kFix4HashSize + h4] = - hash[kFix3HashSize + h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix3HashSize)[h3] = hash[ h2] = p->lzPos; SKIP_FOOTER_MT @@ -777,7 +794,7 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) { case 2: p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)0; + p->MixMatchesFunc = (Mf_Mix_Matches)NULL; vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.h index 46b6924ad7..3d86c788f3 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2015-05-03 : Igor Pavlov : Public domain */ +2017-04-03 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_MT_H #define __LZ_FIND_MT_H @@ -90,9 +90,9 @@ typedef struct _CMatchFinderMt } CMatchFinderMt; void MatchFinderMt_Construct(CMatchFinderMt *p); -void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc); +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc); SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc); + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.c index 64f1164f3d..962b94bb63 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.c @@ -1,8 +1,9 @@ /* LzmaDec.c -- LZMA Decoder -2016-05-16 : Igor Pavlov : Public domain */ +2018-02-28 : Igor Pavlov : Public domain */ #include "Precomp.h" +/* #include "CpuArch.h" */ #include "LzmaDec.h" #include @@ -24,9 +25,16 @@ #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ { UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_1(p); i = (i + i) + 1; A1; } -#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) -#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i); A0; } else \ + { UPDATE_1(p + i); A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } @@ -46,12 +54,15 @@ i -= 0x40; } #endif -#define NORMAL_LITER_DEC GET_BIT(prob + symbol, symbol) +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) #define MATCHED_LITER_DEC \ - matchByte <<= 1; \ - bit = (matchByte & offs); \ - probLit = prob + offs + bit + symbol; \ - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + + #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } @@ -66,25 +77,28 @@ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK; i += m; m += m; } else \ + { UPDATE_1_CHECK; m += m; i += m; } + + #define kNumPosBitsMax 4 #define kNumPosStatesMax (1 << kNumPosBitsMax) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) #define kNumLenProbs (LenHigh + kLenNumHighSymbols) +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) #define kNumStates 12 +#define kNumStates2 16 #define kNumLitStates 7 #define kStartPosModelIndex 4 @@ -98,54 +112,117 @@ #define kAlignTableSize (1 << kNumAlignBits) #define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) #define IsRepG0 (IsRep + kNumStates) #define IsRepG1 (IsRepG0 + kNumStates) #define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) - -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 0x300 +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign #endif -#define LzmaProps_GetNumProbs(p) (Literal + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) #define LZMA_DIC_MIN (1 << 12) -/* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then function returns error code. + } + +Processing: + first LZMA symbol will be decoded in any case + All checks for limits are at the end of main loop, + It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + Out: + RangeCoder is normalized Result: SZ_OK - OK SZ_ERROR_DATA - Error p->remainLen: < kMatchSpecLenStart : normal remain = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker (unused now) - = kMatchSpecLenStart + 2 : State Init Marker (unused now) */ -static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - CLzmaProb *probs = p->probs; - unsigned state = p->state; +#ifdef _LZMA_DEC_OPT + +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); Byte *dic = p->dic; SizeT dicBufSize = p->dicBufSize; @@ -164,17 +241,16 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = processedPos & pbMask; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0(prob) { unsigned symbol; UPDATE_0(prob); prob = probs + Literal; if (processedPos != 0 || checkDicSize != 0) - prob += ((UInt32)LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + - (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); processedPos++; if (state < kNumLitStates) @@ -240,13 +316,16 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte else { UPDATE_1(prob); + /* + // that case was checked before with kBadRepCode if (checkDicSize == 0 && processedPos == 0) return SZ_ERROR_DATA; + */ prob = probs + IsRepG0 + state; IF_BIT_0(prob) { UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0(prob) { UPDATE_0(prob); @@ -299,7 +378,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; offset = 0; lim = (1 << kLenNumLowBits); } @@ -310,15 +389,15 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; - lim = (1 << kLenNumMidBits); + lim = (1 << kLenNumLowBits); } else { UPDATE_1(probLen); probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; + offset = kLenNumLowSymbols * 2; lim = (1 << kLenNumHighBits); } } @@ -331,7 +410,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; len = 1; TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len); @@ -345,7 +424,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); len = 1; TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len); @@ -356,7 +435,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte UPDATE_1(probLen); probLen = prob + LenHigh; TREE_DECODE(probLen, (1 << kLenNumHighBits), len); - len += kLenNumLowSymbols + kLenNumMidSymbols; + len += kLenNumLowSymbols * 2; } } } @@ -376,16 +455,16 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte if (posSlot < kEndPosModelIndex) { distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; + prob = probs + SpecPos; { - UInt32 mask = 1; - unsigned i = 1; + UInt32 m = 1; + distance++; do { - GET_BIT2(prob + i, i, ; , distance |= mask); - mask <<= 1; + REV_BIT_VAR(prob, distance, m); } - while (--numDirectBits != 0); + while (--numDirectBits); + distance -= m; } } else @@ -412,19 +491,20 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte } */ } - while (--numDirectBits != 0); + while (--numDirectBits); prob = probs + Align; distance <<= kNumAlignBits; { unsigned i = 1; - GET_BIT2(prob + i, i, ; , distance |= 1); - GET_BIT2(prob + i, i, ; , distance |= 2); - GET_BIT2(prob + i, i, ; , distance |= 4); - GET_BIT2(prob + i, i, ; , distance |= 8); + REV_BIT_CONST(prob, i, 1); + REV_BIT_CONST(prob, i, 2); + REV_BIT_CONST(prob, i, 4); + REV_BIT_LAST (prob, i, 8); + distance |= i; } if (distance == (UInt32)0xFFFFFFFF) { - len += kMatchSpecLenStart; + len = kMatchSpecLenStart; state -= kNumStates; break; } @@ -435,20 +515,12 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte rep2 = rep1; rep1 = rep0; rep0 = distance + 1; - if (checkDicSize == 0) - { - if (distance >= processedPos) - { - p->dicPos = dicPos; - return SZ_ERROR_DATA; - } - } - else if (distance >= checkDicSize) + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) { p->dicPos = dicPos; return SZ_ERROR_DATA; } - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; } len += kMatchMinLen; @@ -511,6 +583,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte return SZ_OK; } +#endif static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { @@ -519,7 +592,7 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) Byte *dic = p->dic; SizeT dicPos = p->dicPos; SizeT dicBufSize = p->dicBufSize; - unsigned len = p->remainLen; + unsigned len = (unsigned)p->remainLen; SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ SizeT rem = limit - dicPos; if (rem < len) @@ -540,6 +613,14 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) } } + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { do @@ -550,9 +631,13 @@ static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte UInt32 rem = p->prop.dicSize - p->processedPos; if (limit - p->dicPos > rem) limit2 = p->dicPos + rem; + + if (p->processedPos == 0) + if (p->code >= kBadRepCode) + return SZ_ERROR_DATA; } - - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); + + RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; @@ -561,9 +646,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - if (p->remainLen > kMatchSpecLenStart) - p->remainLen = kMatchSpecLenStart; - return 0; } @@ -580,17 +662,17 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS UInt32 range = p->range; UInt32 code = p->code; const Byte *bufLimit = buf + inSize; - const CLzmaProb *probs = p->probs; - unsigned state = p->state; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; ELzmaDummy res; { const CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); + unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK @@ -618,10 +700,11 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned bit; const CLzmaProb *probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) } while (symbol < 0x100); } @@ -648,7 +731,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; @@ -691,7 +774,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; offset = 0; limit = 1 << kLenNumLowBits; } @@ -702,15 +785,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; + limit = 1 << kLenNumLowBits; } else { UPDATE_1_CHECK; probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; + offset = kLenNumLowSymbols * 2; limit = 1 << kLenNumHighBits; } } @@ -722,7 +805,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned posSlot; prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) @@ -733,7 +816,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS if (posSlot < kEndPosModelIndex) { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); } else { @@ -745,17 +828,18 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS code -= range & (((code - range) >> 31) - 1); /* if (code >= range) code -= range; */ } - while (--numDirectBits != 0); + while (--numDirectBits); prob = probs + Align; numDirectBits = kNumAlignBits; } { unsigned i = 1; + unsigned m = 1; do { - GET_BIT_CHECK(prob + i, i); + REV_BIT_CHECK(prob, i, m); } - while (--numDirectBits != 0); + while (--numDirectBits); } } } @@ -768,18 +852,17 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) { - p->needFlush = 1; - p->remainLen = 0; + p->remainLen = kMatchSpecLenStart + 1; p->tempBufSize = 0; if (initDic) { p->processedPos = 0; p->checkDicSize = 0; - p->needInitState = 1; + p->remainLen = kMatchSpecLenStart + 2; } if (initState) - p->needInitState = 1; + p->remainLen = kMatchSpecLenStart + 2; } void LzmaDec_Init(CLzmaDec *p) @@ -788,53 +871,54 @@ void LzmaDec_Init(CLzmaDec *p) LzmaDec_InitDicAndState(p, True, True); } -static void LzmaDec_InitStateReal(CLzmaDec *p) -{ - SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); - SizeT i; - CLzmaProb *probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); *status = LZMA_STATUS_NOT_SPECIFIED; - while (p->remainLen != kMatchSpecLenStart) + if (p->remainLen > kMatchSpecLenStart) { - int checkEndMarkNow; + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } - if (p->needFlush) - { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) - { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - p->code = - ((UInt32)p->tempBuf[1] << 24) - | ((UInt32)p->tempBuf[2] << 16) - | ((UInt32)p->tempBuf[3] << 8) - | ((UInt32)p->tempBuf[4]); - p->range = 0xFFFFFFFF; - p->needFlush = 0; - p->tempBufSize = 0; - } + p->remainLen = 0; + } + + LzmaDec_WriteRem(p, dicLimit); + + while (p->remainLen != kMatchSpecLenStart) + { + int checkEndMarkNow = 0; - checkEndMarkNow = 0; if (p->dicPos >= dicLimit) { if (p->remainLen == 0 && p->code == 0) @@ -855,9 +939,6 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr checkEndMarkNow = 1; } - if (p->needInitState) - LzmaDec_InitStateReal(p); - if (p->tempBufSize == 0) { SizeT processed; @@ -930,11 +1011,14 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr p->tempBufSize = 0; } } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; + + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; } + SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; @@ -975,19 +1059,19 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *sr } } -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->probs); + ISzAlloc_Free(alloc, p->probs); p->probs = NULL; } -static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->dic); + ISzAlloc_Free(alloc, p->dic); p->dic = NULL; } -void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) { LzmaDec_FreeProbs(p, alloc); LzmaDec_FreeDict(p, alloc); @@ -1011,29 +1095,30 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) if (d >= (9 * 5 * 5)) return SZ_ERROR_UNSUPPORTED; - p->lc = d % 9; + p->lc = (Byte)(d % 9); d /= 9; - p->pb = d / 5; - p->lp = d % 5; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); return SZ_OK; } -static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) { UInt32 numProbs = LzmaProps_GetNumProbs(propNew); if (!p->probs || numProbs != p->numProbs) { LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); if (!p->probs) return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; } return SZ_OK; } -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); @@ -1042,7 +1127,7 @@ SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, I return SZ_OK; } -SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; SizeT dicBufSize; @@ -1062,7 +1147,7 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll if (!p->dic || dicBufSize != p->dicBufSize) { LzmaDec_FreeDict(p, alloc); - p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); if (!p->dic) { LzmaDec_FreeProbs(p, alloc); @@ -1076,7 +1161,7 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAlloc *alloc) + ELzmaStatus *status, ISzAllocPtr alloc) { CLzmaDec p; SRes res; diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.h index 2633abeac9..28ce60c3ea 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaDec.h @@ -1,5 +1,5 @@ /* LzmaDec.h -- LZMA Decoder -2013-01-18 : Igor Pavlov : Public domain */ +2018-04-21 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H @@ -12,11 +12,13 @@ EXTERN_C_BEGIN /* _LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ +typedef #ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; /* ---------- LZMA Properties ---------- */ @@ -25,7 +27,10 @@ EXTERN_C_BEGIN typedef struct _CLzmaProps { - unsigned lc, lp, pb; + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; UInt32 dicSize; } CLzmaProps; @@ -47,32 +52,34 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); typedef struct { + /* Don't change this structure. ASM code can use it. */ CLzmaProps prop; CLzmaProb *probs; + CLzmaProb *probs_1664; Byte *dic; - const Byte *buf; - UInt32 range, code; - SizeT dicPos; SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; UInt32 processedPos; UInt32 checkDicSize; - unsigned state; UInt32 reps[4]; - unsigned remainLen; - int needFlush; - int needInitState; + UInt32 state; + UInt32 remainLen; + UInt32 numProbs; unsigned tempBufSize; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } +#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } void LzmaDec_Init(CLzmaDec *p); /* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ typedef enum { @@ -129,11 +136,11 @@ LzmaDec_Allocate* can return: SZ_ERROR_UNSUPPORTED - Unsupported properties */ -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); -SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); -void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); /* ---------- Dictionary Interface ---------- */ @@ -142,7 +149,7 @@ void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); You must work with CLzmaDec variables directly in this interface. STEPS: - LzmaDec_Constr() + LzmaDec_Construct() LzmaDec_Allocate() for (each new stream) { @@ -220,7 +227,7 @@ Returns: SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAlloc *alloc); + ELzmaStatus *status, ISzAllocPtr alloc); EXTERN_C_END diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.c index 462ca67565..bebe664d3e 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2016-05-16 : Igor Pavlov : Public domain */ +2018-04-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -23,17 +23,8 @@ static unsigned g_STAT_OFFSET = 0; #endif -#define kMaxHistorySize ((UInt32)3 << 29) -/* #define kMaxHistorySize ((UInt32)7 << 29) */ - -#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) - -#define kBlockSize (9 << 10) -#define kUnpackBlockSize (1 << 18) -#define kMatchArraySize (1 << 21) -#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) - -#define kNumMaxDirectBits (31) +#define kLzmaMaxHistorySize ((UInt32)3 << 29) +/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) @@ -62,14 +53,15 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (level < 0) level = 5; p->level = level; - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); + if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); if (p->dictSize > p->reduceSize) { unsigned i; + UInt32 reduceSize = (UInt32)p->reduceSize; for (i = 11; i <= 30; i++) { - if ((UInt32)p->reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } - if ((UInt32)p->reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } + if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } + if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } } } @@ -110,9 +102,9 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } -static UInt32 GetPosSlot1(UInt32 pos) +static unsigned GetPosSlot1(UInt32 pos) { - UInt32 res; + unsigned res; BSR2_RET(pos, res); return res; } @@ -145,18 +137,18 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos) /* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ /* -#define BSR2_RET(pos, res) { UInt32 zz = 6 + ((kNumLogBits - 1) & \ +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ res = p->g_FastPos[pos >> zz] + (zz * 2); } */ /* -#define BSR2_RET(pos, res) { UInt32 zz = 6 + ((kNumLogBits - 1) & \ +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ res = p->g_FastPos[pos >> zz] + (zz * 2); } */ -#define BSR2_RET(pos, res) { UInt32 zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ +#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ res = p->g_FastPos[pos >> zz] + (zz * 2); } /* @@ -167,32 +159,32 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos) #define GetPosSlot1(pos) p->g_FastPos[pos] #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } #endif #define LZMA_NUM_REPS 4 -typedef unsigned CState; +typedef UInt16 CState; +typedef UInt16 CExtra; typedef struct { UInt32 price; - CState state; - int prev1IsChar; - int prev2; - - UInt32 posPrev2; - UInt32 backPrev2; - - UInt32 posPrev; - UInt32 backPrev; - UInt32 backs[LZMA_NUM_REPS]; + CExtra extra; + // 0 : normal + // 1 : LIT : MATCH + // > 1 : MATCH (extra-1) : LIT : REP0 (len) + UInt32 len; + UInt32 dist; + UInt32 reps[LZMA_NUM_REPS]; } COptimal; + #define kNumOpts (1 << 12) +#define kPackReserve (1 + kNumOpts * 2) #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 @@ -200,22 +192,21 @@ typedef struct #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) - #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kAlignMask (kAlignTableSize - 1) #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 -#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) - #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) +typedef #ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; #define LZMA_PB_MAX 4 #define LZMA_LC_MAX 8 @@ -223,15 +214,11 @@ typedef struct #define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) +#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) #define LZMA_MATCH_LEN_MIN 2 #define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) @@ -241,27 +228,23 @@ typedef struct typedef struct { - CLzmaProb choice; - CLzmaProb choice2; - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; - CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; CLzmaProb high[kLenNumHighSymbols]; } CLenEnc; typedef struct { - CLenEnc p; - UInt32 tableSize; + unsigned tableSize; + unsigned counters[LZMA_NUM_PB_STATES_MAX]; UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - UInt32 counters[LZMA_NUM_PB_STATES_MAX]; } CLenPriceEnc; typedef struct { UInt32 range; - Byte cache; + unsigned cache; UInt64 low; UInt64 cacheSize; Byte *buf; @@ -277,48 +260,54 @@ typedef struct { CLzmaProb *litProbs; - UInt32 state; + unsigned state; UInt32 reps[LZMA_NUM_REPS]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb posEncoders[kNumFullDistances]; - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; + CLenEnc lenProbs; + CLenEnc repLenProbs; + } CSaveState; +typedef UInt32 CProbPrice; + + typedef struct { void *matchFinderObj; IMatchFinder matchFinder; - UInt32 optimumEndIndex; - UInt32 optimumCurrentIndex; + unsigned optCur; + unsigned optEnd; - UInt32 longestMatchLength; - UInt32 numPairs; + unsigned longestMatchLen; + unsigned numPairs; UInt32 numAvail; - UInt32 numFastBytes; - UInt32 additionalOffset; + unsigned state; + unsigned numFastBytes; + unsigned additionalOffset; UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; + unsigned lpMask, pbMask; + CLzmaProb *litProbs; + CRangeEnc rc; + + UInt32 backRes; unsigned lc, lp, pb; - unsigned lpMask, pbMask; unsigned lclp; - CLzmaProb *litProbs; - Bool fastMode; Bool writeEndMark; Bool finished; @@ -327,19 +316,19 @@ typedef struct UInt64 nowPos64; - UInt32 matchPriceCount; - UInt32 alignPriceCount; + unsigned matchPriceCount; + unsigned alignPriceCount; - UInt32 distTableSize; + unsigned distTableSize; UInt32 dictSize; SRes result; - CRangeEnc rc; - #ifndef _7ZIP_ST Bool mtMode; + // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; + // end of CMatchFinderMt is used in BT and HASH threads #endif CMatchFinder matchFinderBase; @@ -348,33 +337,37 @@ typedef struct Byte pad[128]; #endif - COptimal opt[kNumOpts]; - - #ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; - #endif + // LZ thread + CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + UInt32 alignPrices[kAlignTableSize]; UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - UInt32 alignPrices[kAlignTableSize]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb posEncoders[kNumFullDistances]; + CLenEnc lenProbs; + CLenEnc repLenProbs; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif + CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; + COptimal opt[kNumOpts]; + CSaveState saveState; #ifndef _7ZIP_ST @@ -383,58 +376,62 @@ typedef struct } CLzmaEnc; + +#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); + void LzmaEnc_SaveState(CLzmaEncHandle pp) { CLzmaEnc *p = (CLzmaEnc *)pp; CSaveState *dest = &p->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); } + void LzmaEnc_RestoreState(CLzmaEncHandle pp) { CLzmaEnc *dest = (CLzmaEnc *)pp; const CSaveState *p = &dest->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; + dest->state = p->state; - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); } + + SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) { CLzmaEnc *p = (CLzmaEnc *)pp; @@ -445,7 +442,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) - || props.dictSize > kMaxHistorySize) + || props.dictSize > kLzmaMaxHistorySize) return SZ_ERROR_PARAM; p->dictSize = props.dictSize; @@ -463,7 +460,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->fastMode = (props.algo == 0); p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); { - UInt32 numHashBytes = 4; + unsigned numHashBytes = 4; if (props.btMode) { if (props.numHashBytes < 2) @@ -492,13 +489,27 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) return SZ_OK; } -static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; -#define IsCharState(s) ((s) < 7) +void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->matchFinderBase.expectedDataSize = expectedDataSiize; +} + +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 + +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) #define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) #define kInfinityPrice (1 << 30) @@ -509,14 +520,16 @@ static void RangeEnc_Construct(CRangeEnc *p) p->bufBase = NULL; } -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) #define RC_BUF_SIZE (1 << 16) -static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc) + +static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) { if (!p->bufBase) { - p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE); + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); if (!p->bufBase) return 0; p->bufLim = p->bufBase + RC_BUF_SIZE; @@ -524,19 +537,19 @@ static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc) return 1; } -static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc) +static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->bufBase); + ISzAlloc_Free(alloc, p->bufBase); p->bufBase = 0; } static void RangeEnc_Init(CRangeEnc *p) { /* Stream.Init(); */ - p->low = 0; p->range = 0xFFFFFFFF; - p->cacheSize = 1; p->cache = 0; + p->low = 0; + p->cacheSize = 0; p->buf = p->bufBase; @@ -544,37 +557,48 @@ static void RangeEnc_Init(CRangeEnc *p) p->res = SZ_OK; } -static void RangeEnc_FlushStream(CRangeEnc *p) +MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { size_t num; if (p->res != SZ_OK) return; num = p->buf - p->bufBase; - if (num != p->outStream->Write(p->outStream, p->bufBase, num)) + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) p->res = SZ_ERROR_WRITE; p->processed += num; p->buf = p->bufBase; } -static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) { - if ((UInt32)p->low < (UInt32)0xFF000000 || (unsigned)(p->low >> 32) != 0) + UInt32 low = (UInt32)p->low; + unsigned high = (unsigned)(p->low >> 32); + p->low = (UInt32)(low << 8); + if (low < (UInt32)0xFF000000 || high != 0) { - Byte temp = p->cache; - do { Byte *buf = p->buf; - *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); + *buf++ = (Byte)(p->cache + high); + p->cache = (unsigned)(low >> 24); p->buf = buf; if (buf == p->bufLim) RangeEnc_FlushStream(p); - temp = 0xFF; + if (p->cacheSize == 0) + return; + } + high += 0xFF; + for (;;) + { + Byte *buf = p->buf; + *buf++ = (Byte)(high); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (--p->cacheSize == 0) + return; } - while (--p->cacheSize != 0); - p->cache = (Byte)((UInt32)p->low >> 24); } p->cacheSize++; - p->low = (UInt32)p->low << 8; } static void RangeEnc_FlushData(CRangeEnc *p) @@ -584,78 +608,121 @@ static void RangeEnc_FlushData(CRangeEnc *p) RangeEnc_ShiftLow(p); } -static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, unsigned numBits) -{ - do - { - p->range >>= 1; - p->low += p->range & (0 - ((value >> --numBits) & 1)); - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } - } - while (numBits != 0); -} +#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } -static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol) -{ - UInt32 ttt = *prob; - UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; - if (symbol == 0) - { - p->range = newBound; - ttt += (kBitModelTotal - ttt) >> kNumMoveBits; - } - else - { - p->low += newBound; - p->range -= newBound; - ttt -= ttt >> kNumMoveBits; +#define RC_BIT_PRE(p, prob) \ + ttt = *(prob); \ + newBound = (range >> kNumBitModelTotalBits) * ttt; + +// #define _LZMA_ENC_USE_BRANCH + +#ifdef _LZMA_ENC_USE_BRANCH + +#define RC_BIT(p, prob, symbol) { \ + RC_BIT_PRE(p, prob) \ + if (symbol == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ + else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ } - *prob = (CLzmaProb)ttt; - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); + +#else + +#define RC_BIT(p, prob, symbol) { \ + UInt32 mask; \ + RC_BIT_PRE(p, prob) \ + mask = 0 - (UInt32)symbol; \ + range &= mask; \ + mask &= newBound; \ + range -= mask; \ + (p)->low += mask; \ + mask = (UInt32)symbol - 1; \ + range += newBound & mask; \ + mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ + mask += ((1 << kNumMoveBits) - 1); \ + ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ } + +#endif + + + + +#define RC_BIT_0_BASE(p, prob) \ + range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + +#define RC_BIT_1_BASE(p, prob) \ + range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ + +#define RC_BIT_0(p, prob) \ + RC_BIT_0_BASE(p, prob) \ + RC_NORM(p) + +#define RC_BIT_1(p, prob) \ + RC_BIT_1_BASE(p, prob) \ + RC_NORM(p) + +static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) +{ + UInt32 range, ttt, newBound; + range = p->range; + RC_BIT_PRE(p, prob) + RC_BIT_0(p, prob) + p->range = range; } static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol) { + UInt32 range = p->range; symbol |= 0x100; do { - RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + UInt32 ttt, newBound; + // RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + CLzmaProb *prob = probs + (symbol >> 8); + UInt32 bit = (symbol >> 7) & 1; symbol <<= 1; + RC_BIT(p, prob, bit); } while (symbol < 0x10000); + p->range = range; } static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte) { + UInt32 range = p->range; UInt32 offs = 0x100; symbol |= 0x100; do { + UInt32 ttt, newBound; + CLzmaProb *prob; + UInt32 bit; matchByte <<= 1; - RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + prob = probs + (offs + (matchByte & offs) + (symbol >> 8)); + bit = (symbol >> 7) & 1; symbol <<= 1; offs &= ~(matchByte ^ symbol); + RC_BIT(p, prob, bit); } while (symbol < 0x10000); + p->range = range; } -static void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) + + +static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) { UInt32 i; - for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) + for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) { - const int kCyclesBits = kNumBitPriceShiftBits; - UInt32 w = i; - UInt32 bitCount = 0; - int j; + const unsigned kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); + unsigned bitCount = 0; + unsigned j; for (j = 0; j < kCyclesBits; j++) { w = w * w; @@ -666,37 +733,41 @@ static void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) bitCount++; } } - ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + // printf("\n%3d: %5d", i, ProbPrices[i]); } } #define GET_PRICE(prob, symbol) \ - p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICEa(prob, symbol) \ - ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + ProbPrices[((prob) ^ (unsigned)((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] -#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] +#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + -static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, const UInt32 *ProbPrices) +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, const CProbPrice *ProbPrices) { UInt32 price = 0; symbol |= 0x100; do { - price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); - symbol <<= 1; + unsigned bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[symbol], bit); } - while (symbol < 0x10000); + while (symbol >= 2); return price; } -static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, const UInt32 *ProbPrices) + +static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, const CProbPrice *ProbPrices) { UInt32 price = 0; UInt32 offs = 0x100; @@ -713,520 +784,525 @@ static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt } -static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, UInt32 symbol) { - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0;) - { - UInt32 bit; - i--; - bit = (symbol >> i) & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - } -} - -static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) -{ - UInt32 m = 1; - int i; - for (i = 0; i < numBitLevels; i++) - { - UInt32 bit = symbol & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - symbol >>= 1; - } -} - -static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, const UInt32 *ProbPrices) -{ - UInt32 price = 0; - symbol |= (1 << numBitLevels); - while (symbol != 1) - { - price += GET_PRICEa(probs[symbol >> 1], symbol & 1); - symbol >>= 1; - } - return price; -} - -static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, const UInt32 *ProbPrices) -{ - UInt32 price = 0; - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0; i--) + UInt32 range = rc->range; + unsigned m = 1; + do { - UInt32 bit = symbol & 1; + UInt32 ttt, newBound; + unsigned bit = symbol & 1; + // RangeEnc_EncodeBit(rc, probs + m, bit); symbol >>= 1; - price += GET_PRICEa(probs[m], bit); + RC_BIT(rc, probs + m, bit); m = (m << 1) | bit; } - return price; + while (--numBits); + rc->range = range; } + static void LenEnc_Init(CLenEnc *p) { unsigned i; - p->choice = p->choice2 = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) p->low[i] = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) - p->mid[i] = kProbInitValue; for (i = 0; i < kLenNumHighSymbols; i++) p->high[i] = kProbInitValue; } -static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState) +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned symbol, unsigned posState) { - if (symbol < kLenNumLowSymbols) + UInt32 range, ttt, newBound; + CLzmaProb *probs = p->low; + range = rc->range; + RC_BIT_PRE(rc, probs); + if (symbol >= kLenNumLowSymbols) { - RangeEnc_EncodeBit(rc, &p->choice, 0); - RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice, 1); - if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) + RC_BIT_1(rc, probs); + probs += kLenNumLowSymbols; + RC_BIT_PRE(rc, probs); + if (symbol >= kLenNumLowSymbols * 2) { - RangeEnc_EncodeBit(rc, &p->choice2, 0); - RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice2, 1); - RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); + RC_BIT_1(rc, probs); + rc->range = range; + // RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols * 2); + LitEnc_Encode(rc, p->high, symbol - kLenNumLowSymbols * 2); + return; } + symbol -= kLenNumLowSymbols; } -} -static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, const UInt32 *ProbPrices) -{ - UInt32 a0 = GET_PRICE_0a(p->choice); - UInt32 a1 = GET_PRICE_1a(p->choice); - UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); - UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); - UInt32 i = 0; - for (i = 0; i < kLenNumLowSymbols; i++) + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, symbol); { - if (i >= numSymbols) - return; - prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); + unsigned m; + unsigned bit; + RC_BIT_0(rc, probs); + probs += (posState << (1 + kLenNumLowBits)); + bit = (symbol >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; + bit = (symbol >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; + bit = symbol & 1; RC_BIT(rc, probs + m, bit); + rc->range = range; } - for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) - { - if (i >= numSymbols) - return; - prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); - } - for (; i < numSymbols; i++) - prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); } -static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, const UInt32 *ProbPrices) +static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) { - LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); - p->counters[posState] = p->tableSize; + unsigned i; + for (i = 0; i < 8; i += 2) + { + UInt32 price = startPrice; + UInt32 prob; + price += GET_PRICEa(probs[1 ], (i >> 2)); + price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); + prob = probs[4 + (i >> 1)]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + } } -static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, const UInt32 *ProbPrices) + +MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTable( + CLenPriceEnc *p, unsigned posState, + const CLenEnc *enc, + const CProbPrice *ProbPrices) { - UInt32 posState; - for (posState = 0; posState < numPosStates; posState++) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); + // int y; for (y = 0; y < 100; y++) { + UInt32 a; + unsigned i, numSymbols; + + UInt32 *prices = p->prices[posState]; + { + const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); + SetPrices_3(probs, GET_PRICEa_0(enc->low[0]), prices, ProbPrices); + a = GET_PRICEa_1(enc->low[0]); + SetPrices_3(probs + kLenNumLowSymbols, a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]), prices + kLenNumLowSymbols, ProbPrices); + a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + } + numSymbols = p->tableSize; + p->counters[posState] = numSymbols; + for (i = kLenNumLowSymbols * 2; i < numSymbols; i += 1) + { + prices[i] = a + + // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); + LitEnc_GetPrice(enc->high, i - kLenNumLowSymbols * 2, ProbPrices); + /* + unsigned sym = (i - kLenNumLowSymbols * 2) >> 1; + UInt32 price = a + RcTree_GetPrice(enc->high, kLenNumHighBits - 1, sym, ProbPrices); + UInt32 prob = enc->high[(1 << 7) + sym]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + */ + } + // } } -static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, const UInt32 *ProbPrices) +static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, unsigned numPosStates, + const CLenEnc *enc, + const CProbPrice *ProbPrices) { - LenEnc_Encode(&p->p, rc, symbol, posState); - if (updatePrice) - if (--p->counters[posState] == 0) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); + unsigned posState; + for (posState = 0; posState < numPosStates; posState++) + LenPriceEnc_UpdateTable(p, posState, enc, ProbPrices); } - - -static void MovePos(CLzmaEnc *p, UInt32 num) -{ +/* #ifdef SHOW_STAT g_STAT_OFFSET += num; printf("\n MovePos %u", num); #endif +*/ - if (num != 0) - { - p->additionalOffset += num; - p->matchFinder.Skip(p->matchFinderObj, num); - } -} +#define MOVE_POS(p, num) { \ + p->additionalOffset += (num); \ + p->matchFinder.Skip(p->matchFinderObj, (num)); } + -static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes) +static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { - UInt32 lenRes = 0, numPairs; + unsigned numPairs; + + p->additionalOffset++; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + *numPairsRes = numPairs; #ifdef SHOW_STAT printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); g_STAT_OFFSET++; { - UInt32 i; + unsigned i; for (i = 0; i < numPairs; i += 2) printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); } #endif - if (numPairs > 0) + if (numPairs == 0) + return 0; { - lenRes = p->matches[numPairs - 2]; - if (lenRes == p->numFastBytes) + unsigned len = p->matches[(size_t)numPairs - 2]; + if (len != p->numFastBytes) + return len; { UInt32 numAvail = p->numAvail; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; { - const Byte *pbyCur = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - const Byte *pby = pbyCur + lenRes; - ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[numPairs - 1]; - const Byte *pbyLim = pbyCur + numAvail; - for (; pby != pbyLim && *pby == pby[dif]; pby++); - lenRes = (UInt32)(pby - pbyCur); + const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + const Byte *p2 = p1 + len; + ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; + const Byte *lim = p1 + numAvail; + for (; p2 != lim && *p2 == p2[dif]; p2++); + return (unsigned)(p2 - p1); } } } - p->additionalOffset++; - *numDistancePairsRes = numPairs; - return lenRes; } +#define MARK_LIT ((UInt32)(Int32)-1) -#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False; -#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False; -#define IsShortRep(p) ((p)->backPrev == 0) +#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } +#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } +#define IsShortRep(p) ((p)->dist == 0) -static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState) -{ - return - GET_PRICE_0(p->isRepG0[state]) + - GET_PRICE_0(p->isRep0Long[state][posState]); -} -static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState) +#define GetPrice_ShortRep(p, state, posState) \ + ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) + +#define GetPrice_Rep_0(p, state, posState) ( \ + GET_PRICE_1(p->isMatch[state][posState]) \ + + GET_PRICE_1(p->isRep0Long[state][posState])) \ + + GET_PRICE_1(p->isRep[state]) \ + + GET_PRICE_0(p->isRepG0[state]) + + +static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) { UInt32 price; + UInt32 prob = p->isRepG0[state]; if (repIndex == 0) { - price = GET_PRICE_0(p->isRepG0[state]); + price = GET_PRICE_0(prob); price += GET_PRICE_1(p->isRep0Long[state][posState]); } else { - price = GET_PRICE_1(p->isRepG0[state]); + price = GET_PRICE_1(prob); + prob = p->isRepG1[state]; if (repIndex == 1) - price += GET_PRICE_0(p->isRepG1[state]); + price += GET_PRICE_0(prob); else { - price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE_1(prob); price += GET_PRICE(p->isRepG2[state], repIndex - 2); } } return price; } -static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) -{ - return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + - GetPureRepPrice(p, repIndex, state, posState); -} -static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur) +static unsigned Backward(CLzmaEnc *p, unsigned cur) { - UInt32 posMem = p->opt[cur].posPrev; - UInt32 backMem = p->opt[cur].backPrev; - p->optimumEndIndex = cur; - do + unsigned wr = cur + 1; + p->optEnd = wr; + + for (;;) { - if (p->opt[cur].prev1IsChar) + UInt32 dist = p->opt[cur].dist; + UInt32 len = p->opt[cur].len; + UInt32 extra = p->opt[cur].extra; + cur -= len; + + if (extra) { - MakeAsChar(&p->opt[posMem]) - p->opt[posMem].posPrev = posMem - 1; - if (p->opt[cur].prev2) + wr--; + p->opt[wr].len = len; + cur -= extra; + len = extra; + if (extra == 1) + { + p->opt[wr].dist = dist; + dist = MARK_LIT; + } + else { - p->opt[posMem - 1].prev1IsChar = False; - p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; - p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; + p->opt[wr].dist = 0; + len--; + wr--; + p->opt[wr].dist = MARK_LIT; + p->opt[wr].len = 1; } } + + if (cur == 0) { - UInt32 posPrev = posMem; - UInt32 backCur = backMem; - - backMem = p->opt[posPrev].backPrev; - posMem = p->opt[posPrev].posPrev; - - p->opt[posPrev].backPrev = backCur; - p->opt[posPrev].posPrev = cur; - cur = posPrev; + p->backRes = dist; + p->optCur = wr; + return len; } + + wr--; + p->opt[wr].dist = dist; + p->opt[wr].len = len; } - while (cur != 0); - *backRes = p->opt[0].backPrev; - p->optimumCurrentIndex = p->opt[0].posPrev; - return p->optimumCurrentIndex; } -#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * (UInt32)0x300) -static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) -{ - UInt32 lenEnd, cur; - UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; - UInt32 *matches; - { +#define LIT_PROBS(pos, prevByte) \ + (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) - UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, len; - UInt32 matchPrice, repMatchPrice, normalMatchPrice; - const Byte *data; - Byte curByte, matchByte; - if (p->optimumEndIndex != p->optimumCurrentIndex) - { - const COptimal *opt = &p->opt[p->optimumCurrentIndex]; - UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; - *backRes = opt->backPrev; - p->optimumCurrentIndex = opt->posPrev; - return lenRes; - } - p->optimumCurrentIndex = p->optimumEndIndex = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) - { - *backRes = (UInt32)(-1); - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; +static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) +{ + unsigned last, cur; + UInt32 reps[LZMA_NUM_REPS]; + unsigned repLens[LZMA_NUM_REPS]; + UInt32 *matches; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 lenTest; - const Byte *data2; - reps[i] = p->reps[i]; - data2 = data - reps[i] - 1; - if (data[0] != data2[0] || data[1] != data2[1]) + UInt32 numAvail; + unsigned numPairs, mainLen, repMaxIndex, i, posState; + UInt32 matchPrice, repMatchPrice; + const Byte *data; + Byte curByte, matchByte; + + p->optCur = p->optEnd = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else { - repLens[i] = 0; - continue; + mainLen = p->longestMatchLen; + numPairs = p->numPairs; } - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); - repLens[i] = lenTest; - if (lenTest > repLens[repMaxIndex]) - repMaxIndex = i; - } - if (repLens[repMaxIndex] >= p->numFastBytes) - { - UInt32 lenRes; - *backRes = repMaxIndex; - lenRes = repLens[repMaxIndex]; - MovePos(p, lenRes - 1); - return lenRes; - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) - { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) - { - *backRes = (UInt32)-1; - return 1; - } - - p->opt[0].state = (CState)p->state; - - posState = (position & p->pbMask); - - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsCharState(p->state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAsChar(&p->opt[1]); - - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); - - if (matchByte == curByte) - { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) + + numAvail = p->numAvail; + if (numAvail < 2) { - p->opt[1].price = shortRepPrice; - MakeAsShortRep(&p->opt[1]); + p->backRes = MARK_LIT; + return 1; } - } - lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); - - if (lenEnd < 2) - { - *backRes = p->opt[1].backPrev; - return 1; - } - - p->opt[1].posPrev = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->opt[0].backs[i] = reps[i]; - - len = lenEnd; - do - p->opt[len--].price = kInfinityPrice; - while (len >= 2); - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 repLen = repLens[i]; - UInt32 price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); - do + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; - COptimal *opt = &p->opt[repLen]; - if (curAndLenPrice < opt->price) + unsigned len; + const Byte *data2; + reps[i] = p->reps[i]; + data2 = data - reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = i; - opt->prev1IsChar = False; + repLens[i] = 0; + continue; } + for (len = 2; len < numAvail && data[len] == data2[len]; len++); + repLens[i] = len; + if (len > repLens[repMaxIndex]) + repMaxIndex = i; } - while (--repLen >= 2); - } - - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - - len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); - if (len <= mainLen) - { - UInt32 offs = 0; - while (len > matches[offs]) - offs += 2; - for (; ; len++) + + if (repLens[repMaxIndex] >= p->numFastBytes) { - COptimal *opt; - UInt32 distance = matches[offs + 1]; - - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(len); - if (distance < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][distance]; - else + unsigned len; + p->backRes = repMaxIndex; + len = repLens[repMaxIndex]; + MOVE_POS(p, len - 1) + return len; + } + + matches = p->matches; + + if (mainLen >= p->numFastBytes) + { + p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + curByte = *data; + matchByte = *(data - reps[0]); + + if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) + { + p->backRes = MARK_LIT; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsLitState(p->state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAs_Lit(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + if (matchByte == curByte) + { + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) { - UInt32 slot; - GetPosSlot2(distance, slot); - curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; + p->opt[1].price = shortRepPrice; + MakeAs_ShortRep(&p->opt[1]); } - opt = &p->opt[len]; - if (curAndLenPrice < opt->price) + } + + last = (mainLen >= repLens[repMaxIndex] ? mainLen : repLens[repMaxIndex]); + + if (last < 2) + { + p->backRes = p->opt[1].dist; + return 1; + } + + p->opt[1].len = 1; + + p->opt[0].reps[0] = reps[0]; + p->opt[0].reps[1] = reps[1]; + p->opt[0].reps[2] = reps[2]; + p->opt[0].reps[3] = reps[3]; + + { + unsigned len = last; + do + p->opt[len--].price = kInfinityPrice; + while (len >= 2); + } + + // ---------- REP ---------- + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); + do { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = distance + LZMA_NUM_REPS; - opt->prev1IsChar = False; + UInt32 price2 = price + p->repLenEnc.prices[posState][(size_t)repLen - 2]; + COptimal *opt = &p->opt[repLen]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = repLen; + opt->dist = i; + opt->extra = 0; + } } - if (len == matches[offs]) + while (--repLen >= 2); + } + + + // ---------- MATCH ---------- + { + unsigned len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); + if (len <= mainLen) { - offs += 2; - if (offs == numPairs) - break; + unsigned offs = 0; + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + while (len > matches[offs]) + offs += 2; + + for (; ; len++) + { + COptimal *opt; + UInt32 dist = matches[(size_t)offs + 1]; + UInt32 price2 = normalMatchPrice + p->lenEnc.prices[posState][(size_t)len - LZMA_MATCH_LEN_MIN]; + unsigned lenToPosState = GetLenToPosState(len); + + if (dist < kNumFullDistances) + price2 += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + { + unsigned slot; + GetPosSlot2(dist, slot); + price2 += p->alignPrices[dist & kAlignMask]; + price2 += p->posSlotPrices[lenToPosState][slot]; + } + + opt = &p->opt[len]; + + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + + if (len == matches[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } } } - } + - cur = 0; + cur = 0; #ifdef SHOW_STAT2 /* if (position >= 0) */ { unsigned i; printf("\n pos = %4X", position); - for (i = cur; i <= lenEnd; i++) + for (i = cur; i <= last; i++) printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); } #endif + } + - } + + // ---------- Optimal Parsing ---------- for (;;) { - UInt32 numAvail; - UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; - UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; - Bool nextIsChar; + UInt32 numAvail, numAvailFull; + unsigned newLen, numPairs, prev, state, posState, startLen; + UInt32 curPrice, litPrice, matchPrice, repMatchPrice; + Bool nextIsLit; Byte curByte, matchByte; const Byte *data; - COptimal *curOpt; - COptimal *nextOpt; + COptimal *curOpt, *nextOpt; - cur++; - if (cur == lenEnd) - return Backward(p, backRes, cur); + if (++cur == last) + return Backward(p, cur); newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) { p->numPairs = numPairs; - p->longestMatchLength = newLen; - return Backward(p, backRes, cur); + p->longestMatchLen = newLen; + return Backward(p, cur); } - position++; + curOpt = &p->opt[cur]; - posPrev = curOpt->posPrev; - if (curOpt->prev1IsChar) - { - posPrev--; - if (curOpt->prev2) - { - state = p->opt[curOpt->posPrev2].state; - if (curOpt->backPrev2 < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - else - state = p->opt[posPrev].state; - state = kLiteralNextStates[state]; - } - else - state = p->opt[posPrev].state; - if (posPrev == cur - 1) + prev = cur - curOpt->len; + + if (curOpt->len == 1) { + state = p->opt[prev].state; if (IsShortRep(curOpt)) state = kShortRepNextStates[state]; else @@ -1234,92 +1310,136 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) } else { - UInt32 pos; const COptimal *prevOpt; - if (curOpt->prev1IsChar && curOpt->prev2) + UInt32 b0; + UInt32 dist = curOpt->dist; + + if (curOpt->extra) { - posPrev = curOpt->posPrev2; - pos = curOpt->backPrev2; - state = kRepNextStates[state]; + prev -= curOpt->extra; + state = kState_RepAfterLit; + if (curOpt->extra == 1) + state = (dist < LZMA_NUM_REPS) ? kState_RepAfterLit : kState_MatchAfterLit; } else { - pos = curOpt->backPrev; - if (pos < LZMA_NUM_REPS) + state = p->opt[prev].state; + if (dist < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } - prevOpt = &p->opt[posPrev]; - if (pos < LZMA_NUM_REPS) + + prevOpt = &p->opt[prev]; + b0 = prevOpt->reps[0]; + + if (dist < LZMA_NUM_REPS) { - UInt32 i; - reps[0] = prevOpt->backs[pos]; - for (i = 1; i <= pos; i++) - reps[i] = prevOpt->backs[i - 1]; - for (; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i]; + if (dist == 0) + { + reps[0] = b0; + reps[1] = prevOpt->reps[1]; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[1] = b0; + b0 = prevOpt->reps[1]; + if (dist == 1) + { + reps[0] = b0; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[2] = b0; + reps[0] = prevOpt->reps[dist]; + reps[3] = prevOpt->reps[dist ^ 1]; + } + } } else { - UInt32 i; - reps[0] = (pos - LZMA_NUM_REPS); - for (i = 1; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i - 1]; + reps[0] = (dist - LZMA_NUM_REPS + 1); + reps[1] = b0; + reps[2] = prevOpt->reps[1]; + reps[3] = prevOpt->reps[2]; } } + curOpt->state = (CState)state; + curOpt->reps[0] = reps[0]; + curOpt->reps[1] = reps[1]; + curOpt->reps[2] = reps[2]; + curOpt->reps[3] = reps[3]; - curOpt->backs[0] = reps[0]; - curOpt->backs[1] = reps[1]; - curOpt->backs[2] = reps[2]; - curOpt->backs[3] = reps[3]; - - curPrice = curOpt->price; - nextIsChar = False; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; curByte = *data; - matchByte = *(data - (reps[0] + 1)); + matchByte = *(data - reps[0]); + position++; posState = (position & p->pbMask); - curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - curAnd1Price += - (!IsCharState(state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } + /* + The order of Price checks: + < LIT + <= SHORT_REP + < LIT : REP_0 + < REP [ : LIT : REP_0 ] + < MATCH [ : LIT : REP_0 ] + */ + + curPrice = curOpt->price; + litPrice = curPrice + GET_PRICE_0(p->isMatch[state][posState]); - nextOpt = &p->opt[cur + 1]; + nextOpt = &p->opt[(size_t)cur + 1]; + nextIsLit = False; - if (curAnd1Price < nextOpt->price) + // if (litPrice >= nextOpt->price) litPrice = 0; else // 18.new { - nextOpt->price = curAnd1Price; - nextOpt->posPrev = cur; - MakeAsChar(nextOpt); - nextIsChar = True; + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + litPrice += (!IsLitState(state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + + if (litPrice < nextOpt->price) + { + nextOpt->price = litPrice; + nextOpt->len = 1; + MakeAs_Lit(nextOpt); + nextIsLit = True; + } } matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) + // ---------- SHORT_REP ---------- + // if (IsLitState(state)) // 18.new + if (matchByte == curByte) + // if (repMatchPrice < nextOpt->price) // 18.new + if (nextOpt->len < 2 + || (nextOpt->dist != 0 + && nextOpt->extra <= 1 // 17.old + )) { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); - if (shortRepPrice <= nextOpt->price) + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); + if (shortRepPrice <= nextOpt->price) // 17.old + // if (shortRepPrice < nextOpt->price) // 18.new { nextOpt->price = shortRepPrice; - nextOpt->posPrev = cur; - MakeAsShortRep(nextOpt); - nextIsChar = True; + nextOpt->len = 1; + MakeAs_ShortRep(nextOpt); + nextIsLit = False; } } + numAvailFull = p->numAvail; { UInt32 temp = kNumOpts - 1 - cur; - if (temp < numAvailFull) + if (numAvailFull > temp) numAvailFull = temp; } @@ -1327,41 +1447,53 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) continue; numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - if (!nextIsChar && matchByte != curByte) /* speed optimization */ + // numAvail <= p->numFastBytes + + // ---------- LIT : REP_0 ---------- + + if ( + // litPrice != 0 && // 18.new + !nextIsLit + && matchByte != curByte + && numAvailFull > 2) { - /* try Literal + rep0 */ - UInt32 temp; - UInt32 lenTest2; - const Byte *data2 = data - reps[0] - 1; - UInt32 limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; - - for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++); - lenTest2 = temp - 1; - if (lenTest2 >= 2) + const Byte *data2 = data - reps[0]; + if (data[1] == data2[1] && data[2] == data2[2]) { - UInt32 state2 = kLiteralNextStates[state]; - UInt32 posStateNext = (position + 1) & p->pbMask; - UInt32 nextRepMatchPrice = curAnd1Price + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - /* for (; lenTest2 >= 2; lenTest2--) */ + unsigned len; + unsigned limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + for (len = 3; len < limit && data[len] == data2[len]; len++); + { - UInt32 curAndLenPrice; - COptimal *opt; - UInt32 offset = cur + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + unsigned state2 = kLiteralNextStates[state]; + unsigned posState2 = (position + 1) & p->pbMask; + UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); { - opt->price = curAndLenPrice; - opt->posPrev = cur + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = False; + unsigned offset = cur + len; + while (last < offset) + p->opt[++last].price = kInfinityPrice; + + // do + { + UInt32 price2; + COptimal *opt; + len--; + // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2); + price2 = price + p->repLenEnc.prices[posState2][len - LZMA_MATCH_LEN_MIN]; + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len; + opt->dist = 0; + opt->extra = 1; + } + } + // while (len >= 3); } } } @@ -1369,87 +1501,105 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) startLen = 2; /* speed optimization */ { - UInt32 repIndex; - for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) - { - UInt32 lenTest; - UInt32 lenTestTemp; - UInt32 price; - const Byte *data2 = data - reps[repIndex] - 1; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); - while (lenEnd < cur + lenTest) - p->opt[++lenEnd].price = kInfinityPrice; - lenTestTemp = lenTest; - price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); - do + // ---------- REP ---------- + unsigned repIndex = 0; // 17.old + // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused + for (; repIndex < LZMA_NUM_REPS; repIndex++) { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; - COptimal *opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) + unsigned len; + UInt32 price; + const Byte *data2 = data - reps[repIndex]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + + for (len = 2; len < numAvail && data[len] == data2[len]; len++); + + // if (len < startLen) continue; // 18.new: speed optimization + + while (last < cur + len) + p->opt[++last].price = kInfinityPrice; { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = repIndex; - opt->prev1IsChar = False; + unsigned len2 = len; + price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); + do + { + UInt32 price2 = price + p->repLenEnc.prices[posState][(size_t)len2 - 2]; + COptimal *opt = &p->opt[cur + len2]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len2; + opt->dist = repIndex; + opt->extra = 0; + } + } + while (--len2 >= 2); } - } - while (--lenTest >= 2); - lenTest = lenTestTemp; - - if (repIndex == 0) - startLen = lenTest + 1; - /* if (_maxMode) */ + if (repIndex == 0) startLen = len + 1; // 17.old + // startLen = len + 1; // 18.new + + /* if (_maxMode) */ { - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; + // ---------- REP : LIT : REP_0 ---------- + // numFastBytes + 1 + numFastBytes + + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; if (limit > numAvailFull) limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) + + for (; len2 < limit && data[len2] == data2[len2]; len2++); + + len2 -= len; + if (len2 >= 3) { - UInt32 nextRepMatchPrice; - UInt32 state2 = kRepNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = - price + p->repLenEnc.prices[posState][lenTest - 2] + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (position + lenTest + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); + unsigned state2 = kRepNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + price += + p->repLenEnc.prices[posState][(size_t)len - 2] + + GET_PRICE_0(p->isMatch[state2][posState2]) + + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); - /* for (; lenTest2 >= 2; lenTest2--) */ + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterRep; + posState2 = (posState2 + 1) & p->pbMask; + + + price += GetPrice_Rep_0(p, state2, posState2); { - UInt32 curAndLenPrice; - COptimal *opt; - UInt32 offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + unsigned offset = cur + len + len2; + while (last < offset) + p->opt[++last].price = kInfinityPrice; + // do { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = repIndex; + unsigned price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + p->repLenEnc.prices[posState2][len2 - LZMA_MATCH_LEN_MIN]; + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len2; + opt->extra = (CExtra)(len + 1); + opt->dist = repIndex; + } } + // while (len2 >= 3); } } } + } } - } - /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ + + + // ---------- MATCH ---------- + /* for (unsigned len = 2; len <= newLen; len++) */ if (newLen > numAvail) { newLen = numAvail; @@ -1457,134 +1607,148 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) matches[numPairs] = newLen; numPairs += 2; } + if (newLen >= startLen) { UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - UInt32 offs, curBack, posSlot; - UInt32 lenTest; - while (lenEnd < cur + newLen) - p->opt[++lenEnd].price = kInfinityPrice; + UInt32 dist; + unsigned offs, posSlot, len; + while (last < cur + newLen) + p->opt[++last].price = kInfinityPrice; offs = 0; while (startLen > matches[offs]) offs += 2; - curBack = matches[offs + 1]; - GetPosSlot2(curBack, posSlot); - for (lenTest = /*2*/ startLen; ; lenTest++) + dist = matches[(size_t)offs + 1]; + + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); + + for (len = /*2*/ startLen; ; len++) { - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; - { - UInt32 lenToPosState = GetLenToPosState(lenTest); - COptimal *opt; - if (curBack < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; - else - curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; - - opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) + UInt32 price = normalMatchPrice + p->lenEnc.prices[posState][(size_t)len - LZMA_MATCH_LEN_MIN]; { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = curBack + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } + COptimal *opt; + unsigned lenToPosState = len - 2; lenToPosState = GetLenToPosState2(lenToPosState); + if (dist < kNumFullDistances) + price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + price += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[dist & kAlignMask]; + + opt = &p->opt[cur + len]; + if (price < opt->price) + { + opt->price = price; + opt->len = len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } } - if (/*_maxMode && */lenTest == matches[offs]) + if (/*_maxMode && */ len == matches[offs]) { - /* Try Match + Literal + Rep0 */ - const Byte *data2 = data - curBack - 1; - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; + // MATCH : LIT : REP_0 + + const Byte *data2 = data - dist - 1; + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; if (limit > numAvailFull) limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) + + for (; len2 < limit && data[len2] == data2[len2]; len2++); + + len2 -= len; + + if (len2 >= 3) { - UInt32 nextRepMatchPrice; - UInt32 state2 = kMatchNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = curAndLenPrice + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (posStateNext + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ + unsigned state2 = kMatchNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + unsigned offset; + price += GET_PRICE_0(p->isMatch[state2][posState2]); + price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterMatch; + + posState2 = (posState2 + 1) & p->pbMask; + price += GetPrice_Rep_0(p, state2, posState2); + + offset = cur + len + len2; + while (last < offset) + p->opt[++last].price = kInfinityPrice; + // do { - UInt32 offset = cur + lenTest + 1 + lenTest2; - UInt32 curAndLenPrice2; + UInt32 price2; COptimal *opt; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice2 = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + p->repLenEnc.prices[posState2][len2 - LZMA_MATCH_LEN_MIN]; opt = &p->opt[offset]; - if (curAndLenPrice2 < opt->price) + // offset--; + if (price2 < opt->price) { - opt->price = curAndLenPrice2; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = curBack + LZMA_NUM_REPS; + opt->price = price2; + opt->len = len2; + opt->extra = (CExtra)(len + 1); + opt->dist = dist + LZMA_NUM_REPS; } } + // while (len2 >= 3); } + offs += 2; if (offs == numPairs) break; - curBack = matches[offs + 1]; - if (curBack >= kNumFullDistances) - GetPosSlot2(curBack, posSlot); + dist = matches[(size_t)offs + 1]; + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); } } } } } + + #define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) -static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) + + +static unsigned GetOptimumFast(CLzmaEnc *p) { - UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; + UInt32 numAvail, mainDist; + unsigned mainLen, numPairs, repIndex, repLen, i; const Byte *data; - const UInt32 *matches; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { - mainLen = p->longestMatchLength; + mainLen = p->longestMatchLen; numPairs = p->numPairs; } numAvail = p->numAvail; - *backRes = (UInt32)-1; + p->backRes = MARK_LIT; if (numAvail < 2) return 1; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len; - const Byte *data2 = data - p->reps[i] - 1; + unsigned len; + const Byte *data2 = data - p->reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) continue; for (len = 2; len < numAvail && data[len] == data2[len]; len++); if (len >= p->numFastBytes) { - *backRes = i; - MovePos(p, len - 1); + p->backRes = i; + MOVE_POS(p, len - 1) return len; } if (len > repLen) @@ -1594,84 +1758,152 @@ static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) } } - matches = p->matches; if (mainLen >= p->numFastBytes) { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); + p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) return mainLen; } mainDist = 0; /* for GCC */ + if (mainLen >= 2) { - mainDist = matches[numPairs - 1]; - while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) + mainDist = p->matches[(size_t)numPairs - 1]; + while (numPairs > 2) { - if (!ChangePair(matches[numPairs - 3], mainDist)) + UInt32 dist2; + if (mainLen != p->matches[(size_t)numPairs - 4] + 1) + break; + dist2 = p->matches[(size_t)numPairs - 3]; + if (!ChangePair(dist2, mainDist)) break; numPairs -= 2; - mainLen = matches[numPairs - 2]; - mainDist = matches[numPairs - 1]; + mainLen--; + mainDist = dist2; } if (mainLen == 2 && mainDist >= 0x80) mainLen = 1; } - if (repLen >= 2 && ( - (repLen + 1 >= mainLen) || - (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || - (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) + if (repLen >= 2) + if ( repLen + 1 >= mainLen + || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) { - *backRes = repIndex; - MovePos(p, repLen - 1); + p->backRes = repIndex; + MOVE_POS(p, repLen - 1) return repLen; } if (mainLen < 2 || numAvail <= 2) return 1; - p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); - if (p->longestMatchLength >= 2) { - UInt32 newDistance = matches[p->numPairs - 1]; - if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || - (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || - (p->longestMatchLength > mainLen + 1) || - (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) - return 1; + unsigned len1 = ReadMatchDistances(p, &p->numPairs); + p->longestMatchLen = len1; + + if (len1 >= 2) + { + UInt32 newDist = p->matches[(size_t)p->numPairs - 1]; + if ( (len1 >= mainLen && newDist < mainDist) + || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist)) + || (len1 > mainLen + 1) + || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) + return 1; + } } data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len, limit; - const Byte *data2 = data - p->reps[i] - 1; + unsigned len, limit; + const Byte *data2 = data - p->reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) continue; limit = mainLen - 1; - for (len = 2; len < limit && data[len] == data2[len]; len++); - if (len >= limit) - return 1; + for (len = 2;; len++) + { + if (len >= limit) + return 1; + if (data[len] != data2[len]) + break; + } + } + + p->backRes = mainDist + LZMA_NUM_REPS; + if (mainLen != 2) + { + MOVE_POS(p, mainLen - 2) } - *backRes = mainDist + LZMA_NUM_REPS; - MovePos(p, mainLen - 2); return mainLen; } -static void WriteEndMarker(CLzmaEnc *p, UInt32 posState) + + + +static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { - UInt32 len; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + UInt32 range; + range = p->rc.range; + { + UInt32 ttt, newBound; + CLzmaProb *prob = &p->isMatch[p->state][posState]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_1(&p->rc, prob) + prob = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_0(&p->rc, prob) + } p->state = kMatchNextStates[p->state]; - len = LZMA_MATCH_LEN_MIN; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); - RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + + p->rc.range = range; + LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); + range = p->rc.range; + + { + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1); + CLzmaProb *probs = p->posSlotEncoder[0]; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < (1 << kNumPosSlotBits)); + } + { + // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range; + unsigned numBits = 30 - kNumAlignBits; + do + { + range >>= 1; + p->rc.low += range; + RC_NORM(&p->rc) + } + while (--numBits); + } + + { + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + CLzmaProb *probs = p->posAlignEncoder; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < kAlignTableSize); + } + p->rc.range = range; } + static SRes CheckErrors(CLzmaEnc *p) { if (p->result != SZ_OK) @@ -1685,7 +1917,8 @@ static SRes CheckErrors(CLzmaEnc *p) return p->result; } -static SRes Flush(CLzmaEnc *p, UInt32 nowPos) + +MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; @@ -1696,47 +1929,108 @@ static SRes Flush(CLzmaEnc *p, UInt32 nowPos) return CheckErrors(p); } + + static void FillAlignPrices(CLzmaEnc *p) { - UInt32 i; - for (i = 0; i < kAlignTableSize; i++) - p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + unsigned i; + const CProbPrice *ProbPrices = p->ProbPrices; + const CLzmaProb *probs = p->posAlignEncoder; p->alignPriceCount = 0; + for (i = 0; i < kAlignTableSize / 2; i++) + { + UInt32 price = 0; + unsigned symbol = i; + unsigned m = 1; + unsigned bit; + UInt32 prob; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + prob = probs[m]; + p->alignPrices[i ] = price + GET_PRICEa_0(prob); + p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); + // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + } } + static void FillDistancesPrices(CLzmaEnc *p) { UInt32 tempPrices[kNumFullDistances]; - UInt32 i, lenToPosState; + unsigned i, lenToPosState; + + const CProbPrice *ProbPrices = p->ProbPrices; + p->matchPriceCount = 0; + for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { - UInt32 posSlot = GetPosSlot1(i); - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); + unsigned posSlot = GetPosSlot1(i); + unsigned footerBits = ((posSlot >> 1) - 1); + unsigned base = ((2 | (posSlot & 1)) << footerBits); + // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); + + const CLzmaProb *probs = p->posEncoders + base; + UInt32 price = 0; + unsigned m = 1; + unsigned symbol = i - base; + do + { + unsigned bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) + bit; + } + while (--footerBits); + tempPrices[i] = price; } for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { - UInt32 posSlot; + unsigned posSlot; const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState]; - for (posSlot = 0; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); - for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + unsigned distTableSize = p->distTableSize; + const CLzmaProb *probs = encoder; + for (posSlot = 0; posSlot < distTableSize; posSlot += 2) + { + // posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); + UInt32 price = 0; + unsigned bit; + unsigned symbol = (posSlot >> 1) + (1 << (kNumPosSlotBits - 1)); + UInt32 prob; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + prob = probs[(posSlot >> 1) + (1 << (kNumPosSlotBits - 1))]; + posSlotPrices[posSlot ] = price + GET_PRICEa_0(prob); + posSlotPrices[posSlot + 1] = price + GET_PRICEa_1(prob); + } + for (posSlot = kEndPosModelIndex; posSlot < distTableSize; posSlot++) + posSlotPrices[posSlot] += ((UInt32)(((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); { UInt32 *distancesPrices = p->distancesPrices[lenToPosState]; - for (i = 0; i < kStartPosModelIndex; i++) - distancesPrices[i] = posSlotPrices[i]; - for (; i < kNumFullDistances; i++) - distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + { + distancesPrices[0] = posSlotPrices[0]; + distancesPrices[1] = posSlotPrices[1]; + distancesPrices[2] = posSlotPrices[2]; + distancesPrices[3] = posSlotPrices[3]; + } + for (i = 4; i < kNumFullDistances; i += 2) + { + UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; + distancesPrices[i ] = slotPrice + tempPrices[i]; + distancesPrices[i + 1] = slotPrice + tempPrices[i + 1]; + } } } - p->matchPriceCount = 0; } + + void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); @@ -1760,26 +2054,27 @@ void LzmaEnc_Construct(CLzmaEnc *p) LzmaEnc_InitPriceTables(p->ProbPrices); p->litProbs = NULL; p->saveState.litProbs = NULL; + } -CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc) +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) { void *p; - p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); + p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); if (p) LzmaEnc_Construct((CLzmaEnc *)p); return p; } -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) +void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->litProbs); - alloc->Free(alloc, p->saveState.litProbs); + ISzAlloc_Free(alloc, p->litProbs); + ISzAlloc_Free(alloc, p->saveState.litProbs); p->litProbs = NULL; p->saveState.litProbs = NULL; } -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) +void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { #ifndef _7ZIP_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); @@ -1790,13 +2085,14 @@ void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) RangeEnc_Free(&p->rc, alloc); } -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig) +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); - alloc->Free(alloc, p); + ISzAlloc_Free(alloc, p); } -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) + +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) @@ -1814,13 +2110,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize if (p->nowPos64 == 0) { - UInt32 numPairs; + unsigned numPairs; Byte curByte; if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) return Flush(p, nowPos32); ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); - p->state = kLiteralNextStates[p->state]; + RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); + // p->state = kLiteralNextStates[p->state]; curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); LitEnc_Encode(&p->rc, p->litProbs, curByte); p->additionalOffset--; @@ -1828,109 +2124,225 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + for (;;) { - UInt32 pos, len, posState; - + UInt32 dist; + unsigned len, posState; + UInt32 range, ttt, newBound; + CLzmaProb *probs; + if (p->fastMode) - len = GetOptimumFast(p, &pos); + len = GetOptimumFast(p); else - len = GetOptimum(p, nowPos32, &pos); + { + unsigned oci = p->optCur; + if (p->optEnd == oci) + len = GetOptimum(p, nowPos32); + else + { + const COptimal *opt = &p->opt[oci]; + len = opt->len; + p->backRes = opt->dist; + p->optCur = oci + 1; + } + } + + posState = (unsigned)nowPos32 & p->pbMask; + range = p->rc.range; + probs = &p->isMatch[p->state][posState]; + + RC_BIT_PRE(&p->rc, probs) + + dist = p->backRes; #ifdef SHOW_STAT2 - printf("\n pos = %4X, len = %u pos = %u", nowPos32, len, pos); + printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); #endif - posState = nowPos32 & p->pbMask; - if (len == 1 && pos == (UInt32)-1) + if (dist == MARK_LIT) { Byte curByte; - CLzmaProb *probs; const Byte *data; + unsigned state; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); + RC_BIT_0(&p->rc, probs); + p->rc.range = range; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - curByte = *data; probs = LIT_PROBS(nowPos32, *(data - 1)); - if (IsCharState(p->state)) + curByte = *data; + state = p->state; + p->state = kLiteralNextStates[state]; + if (IsLitState(state)) LitEnc_Encode(&p->rc, probs, curByte); else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); - p->state = kLiteralNextStates[p->state]; + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); } else { - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - if (pos < LZMA_NUM_REPS) + RC_BIT_1(&p->rc, probs); + probs = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, probs) + + if (dist < LZMA_NUM_REPS) { - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); - if (pos == 0) + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG0[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 0) { - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); - RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); + RC_BIT_0(&p->rc, probs); + probs = &p->isRep0Long[p->state][posState]; + RC_BIT_PRE(&p->rc, probs) + if (len != 1) + { + RC_BIT_1_BASE(&p->rc, probs); + } + else + { + RC_BIT_0_BASE(&p->rc, probs); + p->state = kShortRepNextStates[p->state]; + } } else { - UInt32 distance = p->reps[pos]; - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); - if (pos == 1) - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG1[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 1) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[1]; + } else { - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); - if (pos == 3) + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG2[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 2) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[2]; + } + else + { + RC_BIT_1_BASE(&p->rc, probs); + dist = p->reps[3]; p->reps[3] = p->reps[2]; + } p->reps[2] = p->reps[1]; } p->reps[1] = p->reps[0]; - p->reps[0] = distance; + p->reps[0] = dist; } - if (len == 1) - p->state = kShortRepNextStates[p->state]; - else + + RC_NORM(&p->rc) + + p->rc.range = range; + + if (len != 1) { - LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + if (!p->fastMode) + if (--p->repLenEnc.counters[posState] == 0) + LenPriceEnc_UpdateTable(&p->repLenEnc, posState, &p->repLenProbs, p->ProbPrices); + p->state = kRepNextStates[p->state]; } } else { - UInt32 posSlot; - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + unsigned posSlot; + RC_BIT_0(&p->rc, probs); + p->rc.range = range; p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - pos -= LZMA_NUM_REPS; - GetPosSlot(pos, posSlot); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); + + LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + if (!p->fastMode) + if (--p->lenEnc.counters[posState] == 0) + LenPriceEnc_UpdateTable(&p->lenEnc, posState, &p->lenProbs, p->ProbPrices); + + dist -= LZMA_NUM_REPS; + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = dist + 1; - if (posSlot >= kStartPosModelIndex) + p->matchPriceCount++; + GetPosSlot(dist, posSlot); + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); + { + UInt32 symbol = posSlot + (1 << kNumPosSlotBits); + range = p->rc.range; + probs = p->posSlotEncoder[GetLenToPosState(len)]; + do + { + CLzmaProb *prob = probs + (symbol >> kNumPosSlotBits); + UInt32 bit = (symbol >> (kNumPosSlotBits - 1)) & 1; + symbol <<= 1; + RC_BIT(&p->rc, prob, bit); + } + while (symbol < (1 << kNumPosSlotBits * 2)); + p->rc.range = range; + } + + if (dist >= kStartPosModelIndex) { - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - UInt32 posReduced = pos - base; + unsigned footerBits = ((posSlot >> 1) - 1); - if (posSlot < kEndPosModelIndex) - RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); + if (dist < kNumFullDistances) + { + unsigned base = ((2 | (posSlot & 1)) << footerBits); + RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, dist - base); + } else { - RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - p->alignPriceCount++; + UInt32 pos2 = (dist | 0xF) << (32 - footerBits); + range = p->rc.range; + // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + /* + do + { + range >>= 1; + p->rc.low += range & (0 - ((dist >> --footerBits) & 1)); + RC_NORM(&p->rc) + } + while (footerBits > kNumAlignBits); + */ + do + { + range >>= 1; + p->rc.low += range & (0 - (pos2 >> 31)); + pos2 += pos2; + RC_NORM(&p->rc) + } + while (pos2 != 0xF0000000); + + + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + + { + unsigned m = 1; + unsigned bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + p->rc.range = range; + p->alignPriceCount++; + } } } - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - p->reps[1] = p->reps[0]; - p->reps[0] = pos; - p->matchPriceCount++; } } - p->additionalOffset -= len; + nowPos32 += len; + p->additionalOffset -= len; + if (p->additionalOffset == 0) { UInt32 processed; + if (!p->fastMode) { if (p->matchPriceCount >= (1 << 7)) @@ -1938,13 +2350,15 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize if (p->alignPriceCount >= kAlignTableSize) FillAlignPrices(p); } + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) break; processed = nowPos32 - startPos32; - if (useLimits) + + if (maxPackSize) { - if (processed + kNumOpts + 300 >= maxUnpackSize || - RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) + if (processed + kNumOpts + 300 >= maxUnpackSize + || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) break; } else if (processed >= (1 << 17)) @@ -1954,13 +2368,16 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize } } } + p->nowPos64 += nowPos32 - startPos32; return Flush(p, nowPos32); } + + #define kBigHashDicLimit ((UInt32)1 << 24) -static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { UInt32 beforeSize = kNumOpts; if (!RangeEnc_Alloc(&p->rc, alloc)) @@ -1975,8 +2392,8 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, I if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); if (!p->litProbs || !p->saveState.litProbs) { LzmaEnc_FreeLits(p, alloc); @@ -1994,8 +2411,13 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, I #ifndef _7ZIP_ST if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); + RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, + LZMA_MATCH_LEN_MAX + + 1 /* 18.04 */ + , allocBig)); p->matchFinderObj = &p->matchFinderMt; + p->matchFinderBase.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else @@ -2012,17 +2434,21 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, I void LzmaEnc_Init(CLzmaEnc *p) { - UInt32 i; + unsigned i; p->state = 0; - for (i = 0 ; i < LZMA_NUM_REPS; i++) - p->reps[i] = 0; + p->reps[0] = + p->reps[1] = + p->reps[2] = + p->reps[3] = 1; RangeEnc_Init(&p->rc); + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; for (i = 0; i < kNumStates; i++) { - UInt32 j; + unsigned j; for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { p->isMatch[i][j] = kProbInitValue; @@ -2034,39 +2460,38 @@ void LzmaEnc_Init(CLzmaEnc *p) p->isRepG2[i] = kProbInitValue; } - { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - CLzmaProb *probs = p->litProbs; - for (i = 0; i < num; i++) - probs[i] = kProbInitValue; - } - { for (i = 0; i < kNumLenToPosStates; i++) { CLzmaProb *probs = p->posSlotEncoder[i]; - UInt32 j; + unsigned j; for (j = 0; j < (1 << kNumPosSlotBits); j++) probs[j] = kProbInitValue; } } { - for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + for (i = 0; i < kNumFullDistances; i++) p->posEncoders[i] = kProbInitValue; } - LenEnc_Init(&p->lenEnc.p); - LenEnc_Init(&p->repLenEnc.p); + { + UInt32 num = (UInt32)0x300 << (p->lp + p->lc); + UInt32 k; + CLzmaProb *probs = p->litProbs; + for (k = 0; k < num; k++) + probs[k] = kProbInitValue; + } - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - p->optimumEndIndex = 0; - p->optimumCurrentIndex = 0; + LenEnc_Init(&p->lenProbs); + LenEnc_Init(&p->repLenProbs); + + p->optEnd = 0; + p->optCur = 0; p->additionalOffset = 0; p->pbMask = (1 << p->pb) - 1; - p->lpMask = (1 << p->lp) - 1; + p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); } void LzmaEnc_InitPrices(CLzmaEnc *p) @@ -2080,14 +2505,14 @@ void LzmaEnc_InitPrices(CLzmaEnc *p) p->lenEnc.tableSize = p->repLenEnc.tableSize = p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); } -static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - UInt32 i; - for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) + unsigned i; + for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) if (p->dictSize <= ((UInt32)1 << i)) break; p->distTableSize = i * 2; @@ -2102,7 +2527,7 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *a } static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, - ISzAlloc *alloc, ISzAlloc *allocBig) + ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; p->matchFinderBase.stream = inStream; @@ -2113,7 +2538,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, - ISzAlloc *alloc, ISzAlloc *allocBig) + ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; p->matchFinderBase.stream = inStream; @@ -2129,12 +2554,13 @@ static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) } SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; LzmaEnc_SetInputBuf(p, src, srcLen); p->needInit = 1; + LzmaEnc_SetDataSize(pp, srcLen); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } @@ -2152,15 +2578,15 @@ void LzmaEnc_Finish(CLzmaEncHandle pp) typedef struct { - ISeqOutStream funcTable; + ISeqOutStream vt; Byte *data; SizeT rem; Bool overflow; -} CSeqOutStreamBuf; +} CLzmaEnc_SeqOutStreamBuf; -static size_t MyWrite(void *pp, const void *data, size_t size) +static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) { - CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp; + CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); if (p->rem < size) { size = p->rem; @@ -2193,9 +2619,9 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, CLzmaEnc *p = (CLzmaEnc *)pp; UInt64 nowPos64; SRes res; - CSeqOutStreamBuf outStream; + CLzmaEnc_SeqOutStreamBuf outStream; - outStream.funcTable.Write = MyWrite; + outStream.vt.Write = SeqOutStreamBuf_Write; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; @@ -2207,11 +2633,15 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, if (reInit) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); + nowPos64 = p->nowPos64; RangeEnc_Init(&p->rc); - p->rc.outStream = &outStream.funcTable; + p->rc.outStream = &outStream.vt; + + if (desiredPackSize == 0) + return SZ_ERROR_OUTPUT_EOF; - res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); + res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); *unpackSize = (UInt32)(p->nowPos64 - nowPos64); *destLen -= outStream.rem; @@ -2234,12 +2664,12 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) for (;;) { - res = LzmaEnc_CodeOneBlock(p, False, 0, 0); + res = LzmaEnc_CodeOneBlock(p, 0, 0); if (res != SZ_OK || p->finished) break; if (progress) { - res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); if (res != SZ_OK) { res = SZ_ERROR_PROGRESS; @@ -2251,7 +2681,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) LzmaEnc_Finish(p); /* - if (res == S_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) res = SZ_ERROR_FAIL; } */ @@ -2261,7 +2691,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, - ISzAlloc *alloc, ISzAlloc *allocBig) + ISzAllocPtr alloc, ISzAllocPtr allocBig) { RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); @@ -2296,21 +2726,27 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) } +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +{ + return ((CLzmaEnc *)pp)->writeEndMark; +} + + SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { SRes res; CLzmaEnc *p = (CLzmaEnc *)pp; - CSeqOutStreamBuf outStream; + CLzmaEnc_SeqOutStreamBuf outStream; - outStream.funcTable.Write = MyWrite; + outStream.vt.Write = SeqOutStreamBuf_Write; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; p->writeEndMark = writeEndMark; - p->rc.outStream = &outStream.funcTable; + p->rc.outStream = &outStream.vt; res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); @@ -2330,7 +2766,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); SRes res; diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.h index c2806b45f4..c9938f04bc 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2013-01-18 : Igor Pavlov : Public domain */ +2017-07-27 : Igor Pavlov : Public domain */ #ifndef __LZMA_ENC_H #define __LZMA_ENC_H @@ -12,12 +12,10 @@ EXTERN_C_BEGIN typedef struct _CLzmaEncProps { - int level; /* 0 <= level <= 9 */ + int level; /* 0 <= level <= 9 */ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (1 << 30) for 64-bit version - default = (1 << 24) */ - UInt64 reduceSize; /* estimated size of data that will be compressed. default = 0xFFFFFFFF. - Encoder uses this value to reduce dictionary size */ + (1 << 12) <= dictSize <= (3 << 29) for 64-bit version + default = (1 << 24) */ int lc; /* 0 <= lc <= 8, default = 3 */ int lp; /* 0 <= lp <= 4, default = 0 */ int pb; /* 0 <= pb <= 4, default = 2 */ @@ -25,9 +23,12 @@ typedef struct _CLzmaEncProps int fb; /* 5 <= fb <= 273, default = 32 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int numHashBytes; /* 2, 3 or 4, default = 4 */ - UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ int numThreads; /* 1 or 2, default = 2 */ + + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. + Encoder uses this value to reduce dictionary size */ } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); @@ -37,41 +38,38 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); /* ---------- CLzmaEncHandle Interface ---------- */ -/* LzmaEnc_* functions can return the following exit codes: -Returns: +/* LzmaEnc* functions can return the following exit codes: +SRes: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - Write callback error. + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ typedef void * CLzmaEncHandle; -CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig); +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); + SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); + SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); -/* ---------- One Call Interface ---------- */ -/* LzmaEncode -Return code: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater - SZ_ERROR_OUTPUT_EOF - output buffer overflow - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ +/* ---------- One Call Interface ---------- */ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); EXTERN_C_END diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.c b/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.c index ece07e618a..8fd86f224b 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.c +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2014-09-21 : Igor Pavlov : Public domain */ +2017-06-26 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,18 +12,20 @@ static WRes GetError() { DWORD res = GetLastError(); - return (res) ? (WRes)(res) : 1; + return res ? (WRes)res : 1; } -WRes HandleToWRes(HANDLE h) { return (h != 0) ? 0 : GetError(); } -WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } +static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); } +static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } WRes HandlePtr_Close(HANDLE *p) { if (*p != NULL) + { if (!CloseHandle(*p)) return GetError(); - *p = NULL; + *p = NULL; + } return 0; } @@ -49,7 +51,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) return HandleToWRes(*p); } -WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) +static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) { *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); return HandleToWRes(*p); diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.h b/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.h index e927208d79..f913241aea 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.h +++ b/BaseTools/Source/C/LzmaCompress/Sdk/C/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2013-11-12 : Igor Pavlov : Public domain */ +2017-06-18 : Igor Pavlov : Public domain */ #ifndef __7Z_THREADS_H #define __7Z_THREADS_H @@ -49,7 +49,8 @@ WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); typedef HANDLE CSemaphore; -#define Semaphore_Construct(p) (*p) = NULL +#define Semaphore_Construct(p) *(p) = NULL +#define Semaphore_IsCreated(p) (*(p) != NULL) #define Semaphore_Close(p) HandlePtr_Close(p) #define Semaphore_Wait(p) Handle_WaitObject(*(p)) WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-history.txt b/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-history.txt index 7aaeb07ff4..f4793264a5 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-history.txt +++ b/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-history.txt @@ -1,6 +1,67 @@ HISTORY of the LZMA SDK ----------------------- +18.05 2018-04-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased + by 8% for fastest/fast compression levels and + by 3% for normal/maximum compression levels. +- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in + Windows 10 because of some BUG with "Large Pages" in Windows 10. + Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299). +- The BUG was fixed in Lzma2Enc.c + Lzma2Enc_Encode2() function worked incorretly, + if (inStream == NULL) and the number of block threads is more than 1. + + +18.03 beta 2018-03-04 +------------------------- +- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm + for x64 with about 30% higher speed than main version of LZMA decoder written in C. +- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%. +- 7-Zip now can use multi-threading for 7z/LZMA2 decoding, + if there are multiple independent data chunks in LZMA2 stream. +- 7-Zip now can use multi-threading for xz decoding, + if there are multiple blocks in xz stream. + + +18.01 2019-01-28 +------------------------- +- The BUG in 17.01 - 18.00 beta was fixed: + XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished() + didn't work correctly for xz archives without checksum (CRC). + + +18.00 beta 2019-01-10 +------------------------- +- The BUG in xz encoder was fixed: + There was memory leak of 16 KB for each file compressed with + xz compression method, if additional filter was used. + + +17.01 beta 2017-08-28 +------------------------- +- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression. + 7-Zip now uses additional memory buffers for multi-block LZMA2 compression. + CPU utilization was slightly improved. +- 7-zip now creates multi-block xz archives by default. Block size can be + specified with -ms[Size]{m|g} switch. +- xz decoder now can unpack random block from multi-block xz archives. +- 7-Zip command line: @listfile now doesn't work after -- switch. + Use -i@listfile before -- switch instead. +- The BUGs were fixed: + 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive. + + +17.00 beta 2017-04-29 +------------------------- +- NewHandler.h / NewHandler.cpp: + now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900). +- C/7zTypes.h : the names of variables in interface structures were changed (vt). +- Some bugs were fixed. 7-Zip could crash in some cases. +- Some internal changes in code. + + 16.04 2016-10-04 ------------------------- - The bug was fixed in DllSecur.c. @@ -168,7 +229,7 @@ HISTORY of the LZMA SDK 4.57 2007-12-12 ------------------------- -- Speed optimizations in C++ LZMA Decoder. +- Speed optimizations in ?++ LZMA Decoder. - Small changes for more compatibility with some C/C++ compilers. diff --git a/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-sdk.txt b/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-sdk.txt index 86fef248f4..01521e9398 100644 --- a/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-sdk.txt +++ b/BaseTools/Source/C/LzmaCompress/Sdk/DOC/lzma-sdk.txt @@ -1,4 +1,4 @@ -LZMA SDK 16.04 +LZMA SDK 18.05 -------------- LZMA SDK provides the documentation, samples, header files,