From 7290161c30cf06a4a0c087221e51bb65032055bd Mon Sep 17 00:00:00 2001 From: yeggor Date: Thu, 1 Feb 2024 19:36:31 +0000 Subject: [PATCH 1/2] sync lzma src with uefitool --- uefi_firmware/compression/LZMA/LzmaCompress.c | 80 +- uefi_firmware/compression/LZMA/LzmaCompress.h | 39 +- .../compression/LZMA/LzmaDecompress.c | 98 +- .../compression/LZMA/LzmaDecompress.h | 147 +- .../compression/LZMA/SDK/C/7zTypes.h | 529 +++ .../compression/LZMA/SDK/C/7zVersion.h | 32 +- uefi_firmware/compression/LZMA/SDK/C/Bra.c | 230 ++ uefi_firmware/compression/LZMA/SDK/C/Bra.h | 24 +- uefi_firmware/compression/LZMA/SDK/C/Bra86.c | 103 +- .../compression/LZMA/SDK/C/Compiler.h | 43 + .../compression/LZMA/SDK/C/CpuArch.c | 490 +++ .../compression/LZMA/SDK/C/CpuArch.h | 394 +- uefi_firmware/compression/LZMA/SDK/C/LzFind.c | 1567 ++++++-- uefi_firmware/compression/LZMA/SDK/C/LzFind.h | 135 +- uefi_firmware/compression/LZMA/SDK/C/LzHash.h | 64 +- .../compression/LZMA/SDK/C/LzmaDec.c | 996 +++-- .../compression/LZMA/SDK/C/LzmaDec.h | 71 +- .../compression/LZMA/SDK/C/LzmaEnc.c | 3350 +++++++++++------ .../compression/LZMA/SDK/C/LzmaEnc.h | 82 +- .../compression/LZMA/SDK/C/Precomp.h | 12 + uefi_firmware/compression/LZMA/SDK/C/Types.h | 268 -- uefi_firmware/compression/LZMA/UefiLzma.h | 31 + 22 files changed, 6146 insertions(+), 2639 deletions(-) create mode 100644 uefi_firmware/compression/LZMA/SDK/C/7zTypes.h create mode 100644 uefi_firmware/compression/LZMA/SDK/C/Bra.c create mode 100644 uefi_firmware/compression/LZMA/SDK/C/Compiler.h create mode 100644 uefi_firmware/compression/LZMA/SDK/C/CpuArch.c create mode 100644 uefi_firmware/compression/LZMA/SDK/C/Precomp.h delete mode 100644 uefi_firmware/compression/LZMA/SDK/C/Types.h create mode 100644 uefi_firmware/compression/LZMA/UefiLzma.h diff --git a/uefi_firmware/compression/LZMA/LzmaCompress.c b/uefi_firmware/compression/LZMA/LzmaCompress.c index dccea08..a8ee00b 100644 --- a/uefi_firmware/compression/LZMA/LzmaCompress.c +++ b/uefi_firmware/compression/LZMA/LzmaCompress.c @@ -1,5 +1,3 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check it. -// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /* LZMA Compress Implementation Copyright (c) 2012, Nikolaj Schlej. All rights reserved. @@ -14,44 +12,46 @@ WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. */ #include "LzmaCompress.h" -//#include "Sdk/C/7zVersion.h" +#include "SDK/C/7zVersion.h" #include "SDK/C/LzmaEnc.h" #include #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) -static void * AllocForLzma(void *p, size_t size) { return malloc(size); } -static void FreeForLzma(void *p, void *address) { free(address); } +static void * AllocForLzma(ISzAllocPtr p, size_t size) { (void)p; return malloc(size); } +static void FreeForLzma(ISzAllocPtr p, void *address) { (void)p; free(address); } static ISzAlloc SzAllocForLzma = { &AllocForLzma, &FreeForLzma }; -SRes OnProgress(void *p, UInt64 inSize, UInt64 outSize) +SRes OnProgress(const ICompressProgress *p, UInt64 inSize, UInt64 outSize) { + (void)p; (void)inSize; (void)outSize; return SZ_OK; } static ICompressProgress g_ProgressCallback = { &OnProgress }; STATIC - UINT64 - RShiftU64 ( - UINT64 Operand, - UINT32 Count - ) +UINT64 +EFIAPI +RShiftU64 ( + UINT64 Operand, + UINT32 Count +) { return Operand >> Count; } VOID - SetEncodedSizeOfBuf( +SetEncodedSizeOfBuf( UINT64 EncodedSize, - UINT8 *EncodedData - ) + UINT8* EncodedData +) { INT32 Index; - EncodedData[LZMA_PROPS_SIZE] = EncodedSize & 0xFF; - for (Index = LZMA_PROPS_SIZE+1; Index <= LZMA_PROPS_SIZE + 7; Index++) + EncodedData[LZMA_PROPS_SIZE] = EncodedSize & 0xFF; + for (Index = LZMA_PROPS_SIZE + 1; Index <= LZMA_PROPS_SIZE + 7; Index++) { EncodedSize = RShiftU64(EncodedSize, 8); EncodedData[Index] = EncodedSize & 0xFF; @@ -59,52 +59,52 @@ VOID } EFI_STATUS - LzmaCompress ( - IN UINT8 *Source, - IN SizeT SourceSize, - IN UINT8 *Destination, - IN OUT SizeT *DestinationSize - ) +EFIAPI +LzmaCompress ( + CONST UINT8 *Source, + UINT32 SourceSize, + UINT8 *Destination, + UINT32 *DestinationSize, + UINT32 DictionarySize +) { SRes LzmaResult; CLzmaEncProps props; SizeT propsSize = LZMA_PROPS_SIZE; SizeT destLen = SourceSize + SourceSize / 3 + 128; - if (*DestinationSize < destLen) + if (*DestinationSize < (UINT32)destLen) { - *DestinationSize = destLen; + *DestinationSize = (UINT32)destLen; return EFI_BUFFER_TOO_SMALL; } LzmaEncProps_Init(&props); - props.dictSize = LZMA_DICTIONARY_SIZE; + props.dictSize = DictionarySize; props.level = 9; props.fb = 273; LzmaResult = LzmaEncode( - (Byte*)((UINT8*)Destination + LZMA_HEADER_SIZE), - (SizeT*)&destLen, - (VOID*)Source, - SourceSize, - &props, - (UINT8*)Destination, - (SizeT*)&propsSize, + (Byte*)((UINT8*)Destination + LZMA_HEADER_SIZE), + &destLen, + Source, + (SizeT)SourceSize, + &props, + (UINT8*)Destination, + &propsSize, props.writeEndMark, - &g_ProgressCallback, - &SzAllocForLzma, + &g_ProgressCallback, + &SzAllocForLzma, &SzAllocForLzma); - *DestinationSize = destLen + LZMA_HEADER_SIZE; + *DestinationSize = (UINT32)(destLen + LZMA_HEADER_SIZE); - SetEncodedSizeOfBuf((UINT64)SourceSize, Destination); + SetEncodedSizeOfBuf(SourceSize, Destination); if (LzmaResult == SZ_OK) { return EFI_SUCCESS; - } else { + } + else { return EFI_INVALID_PARAMETER; } } - - - diff --git a/uefi_firmware/compression/LZMA/LzmaCompress.h b/uefi_firmware/compression/LZMA/LzmaCompress.h index 6a5711e..824c755 100644 --- a/uefi_firmware/compression/LZMA/LzmaCompress.h +++ b/uefi_firmware/compression/LZMA/LzmaCompress.h @@ -1,6 +1,6 @@ /* LZMA Compress Header - Copyright (c) 2012, Nikolaj Schlej. All rights reserved. + Copyright (c) 2014, Nikolaj Schlej. All rights reserved. This program and the accompanying materials are licensed and made available under the terms and conditions of the BSD License which accompanies this distribution. The full text of the license may be found at @@ -9,22 +9,33 @@ THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -*/ + */ -#ifndef __LZMACOMPRESS_H__ -#define __LZMACOMPRESS_H__ +#ifndef LZMACOMPRESS_H +#define LZMACOMPRESS_H -#include "SDK/C/Types.h" +#include "SDK/C/7zTypes.h" #include "BaseTypes.h" -#define LZMA_DICTIONARY_SIZE 0x800000 +#ifdef __cplusplus +extern "C" { +#endif -EFI_STATUS -LzmaCompress ( - IN UINT8 *Source, - IN SizeT SourceSize, - IN UINT8 *Destination, - IN OUT SizeT *DestinationSize - ); +#define DEFAULT_LZMA_DICTIONARY_SIZE 0x800000 +#define _LZMA_SIZE_OPT -#endif \ No newline at end of file + EFI_STATUS + EFIAPI + LzmaCompress ( + const UINT8 *Source, + UINT32 SourceSize, + UINT8 *Destination, + UINT32 *DestinationSize, + UINT32 DictionarySize + ); + +#ifdef __cplusplus +} +#endif + +#endif // LZMACOMPRESS_H diff --git a/uefi_firmware/compression/LZMA/LzmaDecompress.c b/uefi_firmware/compression/LZMA/LzmaDecompress.c index c422957..7e3a9ea 100644 --- a/uefi_firmware/compression/LZMA/LzmaDecompress.c +++ b/uefi_firmware/compression/LZMA/LzmaDecompress.c @@ -1,5 +1,3 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check it. -// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /* LZMA Decompress Implementation Copyright (c) 2009 - 2010, Intel Corporation. All rights reserved. @@ -14,28 +12,25 @@ WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. */ #include "LzmaDecompress.h" -#include "SDK/C/Types.h" -//#include "Sdk/C/7zVersion.h" -#include "SDK/C/LzmaDec.h" +#include "SDK/C/7zTypes.h" +#include "SDK/C/7zVersion.h" #include -#include UINT64 - LShiftU64 ( - UINT64 Operand, - UINT32 Count +EFIAPI +LShiftU64 ( + UINT64 Operand, + UINT32 Count ) { return Operand << Count; } -static void * AllocForLzma(void *p, size_t size) { return malloc(size); } -static void FreeForLzma(void *p, void *address) { free(address); } +static void * AllocForLzma(ISzAllocPtr p, size_t size) { (void)p; return malloc(size); } +static void FreeForLzma(ISzAllocPtr p, void *address) { (void)p; free(address); } static ISzAlloc SzAllocForLzma = { &AllocForLzma, &FreeForLzma }; -#define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) - /* Get the size of the uncompressed buffer by parsing EncodeData header. @@ -44,14 +39,14 @@ Get the size of the uncompressed buffer by parsing EncodeData header. @return The size of the uncompressed buffer. */ UINT64 - GetDecodedSizeOfBuf( - const UINT8 *EncodedData +GetDecodedSizeOfBuf ( + UINT8 *EncodedData ) { UINT64 DecodedSize; - INT32 Index; + INT32 Index; - // Parse header + // Parse header DecodedSize = 0; for (Index = LZMA_PROPS_SIZE + 7; Index >= LZMA_PROPS_SIZE; Index--) DecodedSize = LShiftU64(DecodedSize, 8) + EncodedData[Index]; @@ -64,15 +59,15 @@ UINT64 // /* -Given a Lzma compressed source buffer, this function retrieves the size of -the uncompressed buffer and the size of the scratch buffer required +Given a Lzma compressed source buffer, this function retrieves the size of +the uncompressed buffer and the size of the scratch buffer required to decompress the compressed source buffer. -Retrieves the size of the uncompressed buffer and the temporary scratch buffer +Retrieves the size of the uncompressed buffer and the temporary scratch buffer required to decompress the buffer specified by Source and SourceSize. -The size of the uncompressed buffer is returned DestinationSize, +The size of the uncompressed buffer is returned DestinationSize, the size of the scratch buffer is returned ScratchSize, and RETURN_SUCCESS is returned. -This function does not have scratch buffer available to perform a thorough +This function does not have scratch buffer available to perform a thorough checking of the validity of the source data. It just retrieves the "Original Size" field from the LZMA_HEADER_SIZE beginning bytes of the source data and output it as DestinationSize. And ScratchSize is specific to the decompression implementation. @@ -85,35 +80,40 @@ If SourceSize is less than LZMA_HEADER_SIZE, then ASSERT(). that will be generated when the compressed buffer specified by Source and SourceSize is decompressed. -@retval EFI_SUCCESS The size of the uncompressed data was returned -DestinationSize and the size of the scratch +@retval EFI_SUCCESS The size of the uncompressed data was returned +DestinationSize and the size of the scratch buffer was returned ScratchSize. */ EFI_STATUS +EFIAPI LzmaGetInfo ( - IN VOID *Source, - IN size_t SourceSize, - OUT size_t *DestinationSize, - OUT size_t *ScratchSize - ) + CONST VOID *Source, + UINT32 SourceSize, + UINT32 *DestinationSize + ) { - UInt64 DecodedSize; - + UINT64 DecodedSize; ASSERT(SourceSize >= LZMA_HEADER_SIZE); + (void)SourceSize; DecodedSize = GetDecodedSizeOfBuf((UINT8*)Source); - *DestinationSize = DecodedSize; - return EFI_SUCCESS; + if (DecodedSize <= UINT32_MAX) { + *DestinationSize = (UINT32)DecodedSize; + return EFI_SUCCESS; + } + else { + return EFI_INVALID_PARAMETER; + } } /* Decompresses a Lzma compressed source buffer. Extracts decompressed data to its original form. -If the compressed source data specified by Source is successfully decompressed -into Destination, then RETURN_SUCCESS is returned. If the compressed source data +If the compressed source data specified by Source is successfully decompressed +into Destination, then RETURN_SUCCESS is returned. If the compressed source data specified by Source is not a valid compressed data format, then RETURN_INVALID_PARAMETER is returned. @@ -121,20 +121,18 @@ then RETURN_INVALID_PARAMETER is returned. @param SourceSize The size of source buffer. @param Destination The destination buffer to store the decompressed data -@retval EFI_SUCCESS Decompression completed successfully, and +@retval EFI_SUCCESS Decompression completed successfully, and the uncompressed buffer is returned Destination. -@retval EFI_INVALID_PARAMETER -The source buffer specified by Source is corrupted +@retval EFI_INVALID_PARAMETER +The source buffer specified by Source is corrupted (not a valid compressed format). */ EFI_STATUS - LzmaDecompress ( - IN VOID *Source, - IN size_t SourceSize, - IN OUT VOID *Destination, - IN size_t DstSize, - IN OUT VOID *Scratch, - IN size_t ScratchSize +EFIAPI +LzmaDecompress ( + CONST VOID *Source, + UINT32 SourceSize, + VOID *Destination ) { SRes LzmaResult; @@ -143,14 +141,14 @@ EFI_STATUS SizeT EncodedDataSize; DecodedBufSize = (SizeT)GetDecodedSizeOfBuf((UINT8*)Source); - EncodedDataSize = (SizeT) (SourceSize - LZMA_HEADER_SIZE); + EncodedDataSize = (SizeT)(SourceSize - LZMA_HEADER_SIZE); LzmaResult = LzmaDecode( - Destination, + (Byte*)Destination, &DecodedBufSize, (Byte*)((UINT8*)Source + LZMA_HEADER_SIZE), &EncodedDataSize, - Source, + (CONST Byte*) Source, LZMA_PROPS_SIZE, LZMA_FINISH_END, &Status, @@ -159,8 +157,8 @@ EFI_STATUS if (LzmaResult == SZ_OK) { return EFI_SUCCESS; - } else { + } + else { return EFI_INVALID_PARAMETER; } } - diff --git a/uefi_firmware/compression/LZMA/LzmaDecompress.h b/uefi_firmware/compression/LZMA/LzmaDecompress.h index a91dc0a..1ef79bd 100644 --- a/uefi_firmware/compression/LZMA/LzmaDecompress.h +++ b/uefi_firmware/compression/LZMA/LzmaDecompress.h @@ -9,82 +9,85 @@ THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. -*/ + */ -#ifndef __LZMADECOMPRESS_H__ -#define __LZMADECOMPRESS_H__ +#ifndef LZMADECOMPRESS_H +#define LZMADECOMPRESS_H #include "BaseTypes.h" +#include "SDK/C/LzmaDec.h" +#include "SDK/C/Bra.h" -UINT64 -LShiftU64 ( - UINT64 Operand, - UINT32 Count - ); - -/* - Given a Lzma compressed source buffer, this function retrieves the size of - the uncompressed buffer and the size of the scratch buffer required - to decompress the compressed source buffer. - - Retrieves the size of the uncompressed buffer and the temporary scratch buffer - required to decompress the buffer specified by Source and SourceSize. - The size of the uncompressed buffer is returned DestinationSize, - the size of the scratch buffer is returned ScratchSize, and RETURN_SUCCESS is returned. - This function does not have scratch buffer available to perform a thorough - checking of the validity of the source data. It just retrieves the "Original Size" - field from the LZMA_HEADER_SIZE beginning bytes of the source data and output it as DestinationSize. - And ScratchSize is specific to the decompression implementation. - - If SourceSize is less than LZMA_HEADER_SIZE, then ASSERT(). - - @param Source The source buffer containing the compressed data. - @param SourceSize The size, bytes, of the source buffer. - @param DestinationSize A pointer to the size, bytes, of the uncompressed buffer - that will be generated when the compressed buffer specified - by Source and SourceSize is decompressed. - - @retval EFI_SUCCESS The size of the uncompressed data was returned - DestinationSize and the size of the scratch - buffer was returned ScratchSize. - -*/ -EFI_STATUS -LzmaGetInfo ( - IN VOID *Source, - IN size_t SourceSize, - OUT size_t *DestinationSize, - OUT size_t *ScratchSize // UNUSED - ); - -/* - Decompresses a Lzma compressed source buffer. - - Extracts decompressed data to its original form. - If the compressed source data specified by Source is successfully decompressed - into Destination, then RETURN_SUCCESS is returned. If the compressed source data - specified by Source is not a valid compressed data format, - then RETURN_INVALID_PARAMETER is returned. - - @param Source The source buffer containing the compressed data. - @param SourceSize The size of source buffer. - @param Destination The destination buffer to store the decompressed data - - @retval EFI_SUCCESS Decompression completed successfully, and - the uncompressed buffer is returned Destination. - @retval EFI_INVALID_PARAMETER - The source buffer specified by Source is corrupted - (not a valid compressed format). -*/ -EFI_STATUS -LzmaDecompress ( - IN VOID *Source, - IN size_t SourceSize, - IN OUT VOID *Destination, - IN size_t DstSize, // UNUSED - IN OUT VOID *Scratch, // UNUSED - IN size_t SctachSize // UNUSED - ); +#ifdef __cplusplus +extern "C" { +#endif +#define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) + + /* + Given a Lzma compressed source buffer, this function retrieves the size of + the uncompressed buffer and the size of the scratch buffer required + to decompress the compressed source buffer. + + Retrieves the size of the uncompressed buffer and the temporary scratch buffer + required to decompress the buffer specified by Source and SourceSize. + The size of the uncompressed buffer is returned DestinationSize, + the size of the scratch buffer is returned ScratchSize, and RETURN_SUCCESS is returned. + This function does not have scratch buffer available to perform a thorough + checking of the validity of the source data. It just retrieves the "Original Size" + field from the LZMA_HEADER_SIZE beginning bytes of the source data and output it as DestinationSize. + And ScratchSize is specific to the decompression implementation. + + If SourceSize is less than LZMA_HEADER_SIZE, then ASSERT(). + + @param Source The source buffer containing the compressed data. + @param SourceSize The size, bytes, of the source buffer. + @param DestinationSize A pointer to the size, bytes, of the uncompressed buffer + that will be generated when the compressed buffer specified + by Source and SourceSize is decompressed. + + @retval EFI_SUCCESS The size of the uncompressed data was returned + DestinationSize and the size of the scratch + buffer was returned ScratchSize. + + */ + EFI_STATUS + EFIAPI + LzmaGetInfo ( + CONST VOID *Source, + UINT32 SourceSize, + UINT32 *DestinationSize + ); + + /* + Decompresses a Lzma compressed source buffer. + + Extracts decompressed data to its original form. + If the compressed source data specified by Source is successfully decompressed + into Destination, then RETURN_SUCCESS is returned. If the compressed source data + specified by Source is not a valid compressed data format, + then RETURN_INVALID_PARAMETER is returned. + + @param Source The source buffer containing the compressed data. + @param SourceSize The size of source buffer. + @param Destination The destination buffer to store the decompressed data + + @retval EFI_SUCCESS Decompression completed successfully, and + the uncompressed buffer is returned Destination. + @retval EFI_INVALID_PARAMETER + The source buffer specified by Source is corrupted + (not a valid compressed format). + */ + EFI_STATUS + EFIAPI + LzmaDecompress ( + CONST VOID *Source, + UINT32 SourceSize, + VOID *Destination + ); + +#ifdef __cplusplus +} #endif +#endif // LZMADECOMPRESS_H diff --git a/uefi_firmware/compression/LZMA/SDK/C/7zTypes.h b/uefi_firmware/compression/LZMA/SDK/C/7zTypes.h new file mode 100644 index 0000000..f7d7071 --- /dev/null +++ b/uefi_firmware/compression/LZMA/SDK/C/7zTypes.h @@ -0,0 +1,529 @@ +/* 7zTypes.h -- Basic types +2022-04-01 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#else +#include +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + +#else // _WIN32 + +// #define ENV_HAVE_LSTAT +typedef int WRes; + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY__FACILITY_ERRNO 0x800 +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY__FACILITY__WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_PARAMETER EINVAL +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY__FACILITY__WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +#endif + + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +#ifndef RINOK_WRes +#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) + + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#define UINT64_CONST(n) n +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#define UINT64_CONST(n) n ## ULL +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define MY_STD_CALL __stdcall +#else +#define MY_STD_CALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE __forceinline + +#define MY_CDECL __cdecl +#define MY_FAST_CALL __fastcall + +#else // _MSC_VER + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define MY_NO_INLINE __attribute__((noinline)) +// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE + + +#define MY_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define MY_FAST_CALL __attribute__((fastcall)) +// #define MY_FAST_CALL __attribute__((cdecl)) +#define MY_FAST_CALL +#elif defined(MY_CPU_AMD64) +// #define MY_FAST_CALL __attribute__((ms_abi)) +#define MY_FAST_CALL +#else +#define MY_FAST_CALL +#endif + +#endif // _MSC_VER + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct IByteIn IByteIn; +struct IByteIn +{ + Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +typedef struct IByteOut IByteOut; +struct IByteOut +{ + void (*Write)(const IByteOut *p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +typedef struct ISeqInStream ISeqInStream; +struct ISeqInStream +{ + SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); + + +typedef struct ISeqOutStream ISeqOutStream; +struct ISeqOutStream +{ + size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +typedef struct ISeekInStream ISeekInStream; +struct ISeekInStream +{ + SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +typedef struct ILookInStream ILookInStream; +struct ILookInStream +{ + SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(const ILookInStream *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); + + + +typedef struct +{ + ILookInStream vt; + const ISeekInStream *realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +typedef struct ICompressProgress ICompressProgress; + +struct ICompressProgress +{ + SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef MY_container_of + +/* +#define MY_container_of(ptr, type, m) container_of(ptr, type, m) +#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) + +#endif + +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +/* +#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +*/ +#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) + +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +*/ + + +#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +#define k_PropVar_TimePrec_0 0 +#define k_PropVar_TimePrec_Unix 1 +#define k_PropVar_TimePrec_DOS 2 +#define k_PropVar_TimePrec_HighPrec 3 +#define k_PropVar_TimePrec_Base 16 +#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7) +#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9) + +EXTERN_C_END + +#endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/7zVersion.h b/uefi_firmware/compression/LZMA/SDK/C/7zVersion.h index 9d99c5d..49ea81d 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/7zVersion.h +++ b/uefi_firmware/compression/LZMA/SDK/C/7zVersion.h @@ -1,7 +1,27 @@ -#define MY_VER_MAJOR 9 -#define MY_VER_MINOR 20 +#define MY_VER_MAJOR 22 +#define MY_VER_MINOR 01 #define MY_VER_BUILD 0 -#define MY_VERSION "9.20" -#define MY_DATE "2010-11-18" -#define MY_COPYRIGHT ": Igor Pavlov : Public domain" -#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE +#define MY_VERSION_NUMBERS "22.01" +#define MY_VERSION MY_VERSION_NUMBERS + +#ifdef MY_CPU_NAME + #define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")" +#else + #define MY_VERSION_CPU MY_VERSION +#endif + +#define MY_DATE "2022-07-15" +#undef MY_COPYRIGHT +#undef MY_VERSION_COPYRIGHT_DATE +#define MY_AUTHOR_NAME "Igor Pavlov" +#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov" + +#ifdef USE_COPYRIGHT_CR + #define MY_COPYRIGHT MY_COPYRIGHT_CR +#else + #define MY_COPYRIGHT MY_COPYRIGHT_PD +#endif + +#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE +#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE diff --git a/uefi_firmware/compression/LZMA/SDK/C/Bra.c b/uefi_firmware/compression/LZMA/SDK/C/Bra.c new file mode 100644 index 0000000..3b854d9 --- /dev/null +++ b/uefi_firmware/compression/LZMA/SDK/C/Bra.c @@ -0,0 +1,230 @@ +/* Bra.c -- Converters for RISC code +2021-02-09 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "Bra.h" + +SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + Byte *p; + const Byte *lim; + size &= ~(size_t)3; + ip += 4; + p = data; + lim = data + size; + + if (encoding) + + for (;;) + { + for (;;) + { + if (p >= lim) + return (SizeT)(p - data); + p += 4; + if (p[-1] == 0xEB) + break; + } + { + UInt32 v = GetUi32(p - 4); + v <<= 2; + v += ip + (UInt32)(p - data); + v >>= 2; + v &= 0x00FFFFFF; + v |= 0xEB000000; + SetUi32(p - 4, v); + } + } + + for (;;) + { + for (;;) + { + if (p >= lim) + return (SizeT)(p - data); + p += 4; + if (p[-1] == 0xEB) + break; + } + { + UInt32 v = GetUi32(p - 4); + v <<= 2; + v -= ip + (UInt32)(p - data); + v >>= 2; + v &= 0x00FFFFFF; + v |= 0xEB000000; + SetUi32(p - 4, v); + } + } +} + + +SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + Byte *p; + const Byte *lim; + size &= ~(size_t)1; + p = data; + lim = data + size - 4; + + if (encoding) + + for (;;) + { + UInt32 b1; + for (;;) + { + UInt32 b3; + if (p > lim) + return (SizeT)(p - data); + b1 = p[1]; + b3 = p[3]; + p += 2; + b1 ^= 8; + if ((b3 & b1) >= 0xF8) + break; + } + { + UInt32 v = + ((UInt32)b1 << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + + p += 2; + { + UInt32 cur = (ip + (UInt32)(p - data)) >> 1; + v += cur; + } + + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xF8 | (v >> 8)); + } + } + + for (;;) + { + UInt32 b1; + for (;;) + { + UInt32 b3; + if (p > lim) + return (SizeT)(p - data); + b1 = p[1]; + b3 = p[3]; + p += 2; + b1 ^= 8; + if ((b3 & b1) >= 0xF8) + break; + } + { + UInt32 v = + ((UInt32)b1 << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + + p += 2; + { + UInt32 cur = (ip + (UInt32)(p - data)) >> 1; + v -= cur; + } + + /* + SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000)); + SetUi16(p - 2, (UInt16)(v | 0xF800)); + */ + + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xF8 | (v >> 8)); + } + } +} + + +SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + Byte *p; + const Byte *lim; + size &= ~(size_t)3; + ip -= 4; + p = data; + lim = data + size; + + for (;;) + { + for (;;) + { + if (p >= lim) + return (SizeT)(p - data); + p += 4; + /* if ((v & 0xFC000003) == 0x48000001) */ + if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) + break; + } + { + UInt32 v = GetBe32(p - 4); + if (encoding) + v += ip + (UInt32)(p - data); + else + v -= ip + (UInt32)(p - data); + v &= 0x03FFFFFF; + v |= 0x48000000; + SetBe32(p - 4, v); + } + } +} + + +SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + Byte *p; + const Byte *lim; + size &= ~(size_t)3; + ip -= 4; + p = data; + lim = data + size; + + for (;;) + { + for (;;) + { + if (p >= lim) + return (SizeT)(p - data); + /* + v = GetBe32(p); + p += 4; + m = v + ((UInt32)5 << 29); + m ^= (UInt32)7 << 29; + m += (UInt32)1 << 22; + if ((m & ((UInt32)0x1FF << 23)) == 0) + break; + */ + p += 4; + if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) || + (p[-4] == 0x7F && (p[-3] >= 0xC0))) + break; + } + { + UInt32 v = GetBe32(p - 4); + v <<= 2; + if (encoding) + v += ip + (UInt32)(p - data); + else + v -= ip + (UInt32)(p - data); + + v &= 0x01FFFFFF; + v -= (UInt32)1 << 24; + v ^= 0xFF000000; + v >>= 2; + v |= 0x40000000; + SetBe32(p - 4, v); + } + } +} diff --git a/uefi_firmware/compression/LZMA/SDK/C/Bra.h b/uefi_firmware/compression/LZMA/SDK/C/Bra.h index 88caa33..855e37a 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/Bra.h +++ b/uefi_firmware/compression/LZMA/SDK/C/Bra.h @@ -1,14 +1,12 @@ /* Bra.h -- Branch converters for executables -2009-02-07 : Igor Pavlov : Public domain */ +2013-01-18 : Igor Pavlov : Public domain */ #ifndef __BRA_H #define __BRA_H -#include "Types.h" +#include "7zTypes.h" -#ifdef __cplusplus -extern "C" { -#endif +EXTERN_C_BEGIN /* These functions convert relative addresses to absolute addresses @@ -54,15 +52,13 @@ in CALL instructions to increase the compression ratio. */ #define x86_Convert_Init(state) { state = 0; } -SizeT x86_Convert(Byte *data, SizeT size, SizeT ip, SizeT *state, int encoding); -SizeT ARM_Convert(Byte *data, SizeT size, SizeT ip, int encoding); -SizeT ARMT_Convert(Byte *data, SizeT size, SizeT ip, int encoding); -SizeT PPC_Convert(Byte *data, SizeT size, SizeT ip, int encoding); -SizeT SPARC_Convert(Byte *data, SizeT size, SizeT ip, int encoding); -SizeT IA64_Convert(Byte *data, SizeT size, SizeT ip, int encoding); +SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding); +SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -#ifdef __cplusplus -} -#endif +EXTERN_C_END #endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/Bra86.c b/uefi_firmware/compression/LZMA/SDK/C/Bra86.c index b1ecdcf..10a0fbd 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/Bra86.c +++ b/uefi_firmware/compression/LZMA/SDK/C/Bra86.c @@ -1,87 +1,82 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check it. -// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /* Bra86.c -- Converter for x86 code (BCJ) -2008-10-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ -#include "Bra.h" +#include "Precomp.h" -#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) +#include "Bra.h" -const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0}; -const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3}; +#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0) -SizeT x86_Convert(Byte *data, SizeT size, SizeT ip, SizeT *state, int encoding) +SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) { - SizeT bufferPos = 0, prevPosT; - SizeT prevMask = *state & 0x7; + SizeT pos = 0; + UInt32 mask = *state & 7; if (size < 5) return 0; + size -= 4; ip += 5; - prevPosT = (SizeT)0 - 1; for (;;) { - Byte *p = data + bufferPos; - Byte *limit = data + size - 4; + Byte *p = data + pos; + const Byte *limit = data + size; for (; p < limit; p++) if ((*p & 0xFE) == 0xE8) break; - bufferPos = (SizeT)(p - data); - if (p >= limit) - break; - prevPosT = bufferPos - prevPosT; - if (prevPosT > 3) - prevMask = 0; - else + { - prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; - if (prevMask != 0) + SizeT d = (SizeT)(p - data) - pos; + pos = (SizeT)(p - data); + if (p >= limit) { - Byte b = p[4 - kMaskToBitNumber[prevMask]]; - if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) + *state = (d > 2 ? 0 : mask >> (unsigned)d); + return pos; + } + if (d > 2) + mask = 0; + else + { + mask >>= (unsigned)d; + if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1]))) { - prevPosT = bufferPos; - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; + mask = (mask >> 1) | 4; + pos++; continue; } } } - prevPosT = bufferPos; if (Test86MSByte(p[4])) { - SizeT src = ((SizeT)p[4] << 24) | ((SizeT)p[3] << 16) | ((SizeT)p[2] << 8) | ((SizeT)p[1]); - SizeT dest; - for (;;) + UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); + UInt32 cur = ip + (UInt32)pos; + pos += 5; + if (encoding) + v += cur; + else + v -= cur; + if (mask != 0) { - Byte b; - int index; - if (encoding) - dest = (ip + bufferPos) + src; - else - dest = src - (ip + bufferPos); - if (prevMask == 0) - break; - index = kMaskToBitNumber[prevMask] * 8; - b = (Byte)(dest >> (24 - index)); - if (!Test86MSByte(b)) - break; - src = dest ^ ((1LL << (32 - index)) - 1); + unsigned sh = (mask & 6) << 2; + if (Test86MSByte((Byte)(v >> sh))) + { + v ^= (((UInt32)0x100 << sh) - 1); + if (encoding) + v += cur; + else + v -= cur; + } + mask = 0; } - p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); - p[3] = (Byte)(dest >> 16); - p[2] = (Byte)(dest >> 8); - p[1] = (Byte)dest; - bufferPos += 5; + p[1] = (Byte)v; + p[2] = (Byte)(v >> 8); + p[3] = (Byte)(v >> 16); + p[4] = (Byte)(0 - ((v >> 24) & 1)); } else { - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; + mask = (mask >> 1) | 4; + pos++; } } - prevPosT = bufferPos - prevPosT; - *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); - return bufferPos; } diff --git a/uefi_firmware/compression/LZMA/SDK/C/Compiler.h b/uefi_firmware/compression/LZMA/SDK/C/Compiler.h new file mode 100644 index 0000000..a9816fa --- /dev/null +++ b/uefi_firmware/compression/LZMA/SDK/C/Compiler.h @@ -0,0 +1,43 @@ +/* Compiler.h +2021-01-05 : Igor Pavlov : Public domain */ + +#ifndef __7Z_COMPILER_H +#define __7Z_COMPILER_H + + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wunused-private-field" + #endif + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + + #if _MSC_VER >= 1300 + #pragma warning(disable : 4996) // This function or variable may be unsafe + #else + #pragma warning(disable : 4511) // copy constructor could not be generated + #pragma warning(disable : 4512) // assignment operator could not be generated + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4702) // unreachable code + #pragma warning(disable : 4710) // not inlined + #pragma warning(disable : 4714) // function marked as __forceinline not inlined + #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information + #endif + + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" + // #pragma clang diagnostic ignored "-Wreserved-id-macro" + #endif + +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/CpuArch.c b/uefi_firmware/compression/LZMA/SDK/C/CpuArch.c new file mode 100644 index 0000000..1d4d28b --- /dev/null +++ b/uefi_firmware/compression/LZMA/SDK/C/CpuArch.c @@ -0,0 +1,490 @@ +/* CpuArch.c -- CPU specific code +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) +#define USE_ASM +#endif + +#if !defined(USE_ASM) && _MSC_VER >= 1500 +#include +#endif + +#if defined(USE_ASM) && !defined(MY_CPU_AMD64) +static UInt32 CheckFlag(UInt32 flag) +{ + #ifdef _MSC_VER + __asm pushfd; + __asm pop EAX; + __asm mov EDX, EAX; + __asm xor EAX, flag; + __asm push EAX; + __asm popfd; + __asm pushfd; + __asm pop EAX; + __asm xor EAX, EDX; + __asm push EDX; + __asm popfd; + __asm and flag, EAX; + #else + __asm__ __volatile__ ( + "pushf\n\t" + "pop %%EAX\n\t" + "movl %%EAX,%%EDX\n\t" + "xorl %0,%%EAX\n\t" + "push %%EAX\n\t" + "popf\n\t" + "pushf\n\t" + "pop %%EAX\n\t" + "xorl %%EDX,%%EAX\n\t" + "push %%EDX\n\t" + "popf\n\t" + "andl %%EAX, %0\n\t": + "=c" (flag) : "c" (flag) : + "%eax", "%edx"); + #endif + return flag; +} +#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif + +#ifndef USE_ASM + #ifdef _MSC_VER + #if _MSC_VER >= 1600 + #define MY__cpuidex __cpuidex + #else + +/* + __cpuid (function == 4) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) doesn't clear ECX + We still can use __cpuid for low (function) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FAST_CALL / NO_INLINE function, + So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + + DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +*/ + +static +MY_NO_INLINE +void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +{ + UNUSED_VAR(subFunction); + __cpuid(CPUInfo, function); +} + + #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) + #pragma message("======== MY__cpuidex_HACK WAS USED ========") + #endif + #else + #define MY__cpuidex(info, func, func2) __cpuid(info, func) + #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") + #endif +#endif + + + + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) +{ + #ifdef USE_ASM + + #ifdef _MSC_VER + + UInt32 a2, b2, c2, d2; + __asm xor EBX, EBX; + __asm xor ECX, ECX; + __asm xor EDX, EDX; + __asm mov EAX, function; + __asm cpuid; + __asm mov a2, EAX; + __asm mov b2, EBX; + __asm mov c2, ECX; + __asm mov d2, EDX; + + *a = a2; + *b = b2; + *c = c2; + *d = d2; + + #else + + __asm__ __volatile__ ( + #if defined(MY_CPU_AMD64) && defined(__PIC__) + "mov %%rbx, %%rdi;" + "cpuid;" + "xchg %%rbx, %%rdi;" + : "=a" (*a) , + "=D" (*b) , + #elif defined(MY_CPU_X86) && defined(__PIC__) + "mov %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=a" (*a) , + "=D" (*b) , + #else + "cpuid" + : "=a" (*a) , + "=b" (*b) , + #endif + "=c" (*c) , + "=d" (*d) + : "0" (function), "c"(0) ) ; + + #endif + + #else + + int CPUInfo[4]; + + MY__cpuidex(CPUInfo, (int)function, 0); + + *a = (UInt32)CPUInfo[0]; + *b = (UInt32)CPUInfo[1]; + *c = (UInt32)CPUInfo[2]; + *d = (UInt32)CPUInfo[3]; + + #endif +} + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +{ + CHECK_CPUID_IS_SUPPORTED + MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); + MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + return True; +} + +static const UInt32 kVendors[][3] = +{ + { 0x756E6547, 0x49656E69, 0x6C65746E}, + { 0x68747541, 0x69746E65, 0x444D4163}, + { 0x746E6543, 0x48727561, 0x736C7561} +}; + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] && + v[1] == p->vendor[1] && + v[2] == p->vendor[2]) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder() +{ + Cx86cpuid p; + int firm; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_GetFamily(p.ver); + model = x86cpuid_GetModel(p.ver); + + firm = x86cpuid_GetFirm(&p); + + switch (firm) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + /* In-Order Atom CPU */ + model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ + || model == 0x26 /* 45 nm, Z6xx */ + || model == 0x27 /* 32 nm, Z2460 */ + || model == 0x35 /* 32 nm, Z2760 */ + || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return True; +} + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) +#include +static BoolInt CPU_Sys_Is_SSE_Supported() +{ + OSVERSIONINFO vi; + vi.dwOSVersionInfoSize = sizeof(vi); + if (!GetVersionEx(&vi)) + return False; + return (vi.dwMajorVersion >= 5); +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + + +static UInt32 X86_CPUID_ECX_Get_Flags() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return 0; + return p.c; +} + +BoolInt CPU_IsSupported_AES() +{ + return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3() +{ + return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41() +{ + return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return False; + + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + return (d[1] >> 29) & 1; + } +} + +// #include + +#ifdef _WIN32 +#include +#endif + +BoolInt CPU_IsSupported_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5); // avx2 + } +} + +BoolInt CPU_IsSupported_VAES_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB() +{ + Cx86cpuid cpuid; + if (!x86cpuid_CheckAndRead(&cpuid)) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); + if (d[0] < 0x80000001) + return False; + } + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); + return (d[3] >> 26) & 1; + } +} + + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include + +BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ + +static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + + /* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); + */ + +BoolInt CPU_IsSupported_CRC32(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#ifdef __linux__ +#include +#endif + +#ifdef __linux__ + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { uint32_t hwcaps = 0; elf_aux_info(AT_HWCAP, &hwcaps, sizeof(hwcaps)); return (hwcaps & (HWCAP_ ## name2)) ? 1 : 0; } +#endif + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) +#ifdef __linux__ + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } +#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { uint32_t hwcaps = 0; elf_aux_info(AT_HWCAP2, &hwcaps, sizeof(hwcaps)); return (hwcaps & (HWCAP2_ ## name)) ? 1 : 0; } +#endif + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + MY_HWCAP_CHECK_FUNC(NEON) + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + int res = My_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + +#endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/CpuArch.h b/uefi_firmware/compression/LZMA/SDK/C/CpuArch.h index 01930c7..4856fbb 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/CpuArch.h +++ b/uefi_firmware/compression/LZMA/SDK/C/CpuArch.h @@ -1,77 +1,290 @@ /* CpuArch.h -- CPU specific code -2010-10-26: Igor Pavlov : Public domain */ +2022-07-15 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H -#include "Types.h" +#include "7zTypes.h" EXTERN_C_BEGIN /* MY_CPU_LE means that CPU is LITTLE ENDIAN. -If MY_CPU_LE is not defined, we don't know about that property of platform (it can be LITTLE ENDIAN). +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. -If MY_CPU_LE_UNALIGN is not defined, we don't know about these properties of platform. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ -#if defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) -#define MY_CPU_AMD64 +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif -#if defined(MY_CPU_AMD64) || defined(_M_IA64) -#define MY_CPU_64BIT + +#if defined(_M_ARM64) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 + #define MY_CPU_NAME "arm64" + #define MY_CPU_64BIT #endif -#if defined(_M_IX86) || defined(__i386__) -#define MY_CPU_X86 + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#else + #define MY_CPU_NAME "ppc" + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif +#endif + + +#if defined(__riscv) \ + || defined(__riscv__) + #if __riscv_xlen == 32 + #define MY_CPU_NAME "riscv32" + #elif __riscv_xlen == 64 + #define MY_CPU_NAME "riscv64" + #else + #define MY_CPU_NAME "riscv" + #endif +#endif + + #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #define MY_CPU_X86_OR_AMD64 #endif -#if defined(MY_CPU_X86) || defined(_M_ARM) -#define MY_CPU_32BIT +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 #endif -#if defined(_WIN32) && defined(_M_ARM) -#define MY_CPU_ARM_LE + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + #endif -#if defined(_WIN32) && defined(_M_IA64) -#define MY_CPU_IA64_LE + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE #endif -#if defined(MY_CPU_X86_OR_AMD64) -#define MY_CPU_LE_UNALIGN +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE #endif -#if defined(MY_CPU_X86_OR_AMD64) || defined(MY_CPU_ARM_LE) || defined(MY_CPU_IA64_LE) || defined(__ARMEL__) || defined(__MIPSEL__) || defined(__LITTLE_ENDIAN__) -#define MY_CPU_LE + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian #endif -#if defined(__BIG_ENDIAN__) -#define MY_CPU_BE + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT #endif -#if defined(MY_CPU_LE) && defined(MY_CPU_BE) -Stop_Compiling_Bad_Endian +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + + +#ifndef MY_CPU_NAME + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) + /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. + So we can't use unaligned 64-bit operations. */ + #define MY_CPU_LE_UNALIGN + #endif #endif + #ifdef MY_CPU_LE_UNALIGN -#define GetUi16(p) (*(const UInt16 *)(p)) -#define GetUi32(p) (*(const UInt32 *)(p)) -#define GetUi64(p) (*(const UInt64 *)(p)) -#define SetUi16(p, d) *(UInt16 *)(p) = (d); -#define SetUi32(p, d) *(UInt32 *)(p) = (d); -#define SetUi64(p, d) *(UInt64 *)(p) = (d); +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#endif + +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#ifdef MY_CPU_LE_UNALIGN_64 +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif #else -#define GetUi16(p) (((const Byte *)(p))[0] | ((UInt16)((const Byte *)(p))[1] << 8)) +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) #define GetUi32(p) ( \ ((const Byte *)(p))[0] | \ @@ -79,30 +292,63 @@ Stop_Compiling_Bad_Endian ((UInt32)((const Byte *)(p))[2] << 16) | \ ((UInt32)((const Byte *)(p))[3] << 24)) +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#endif + + +#ifndef MY_CPU_LE_UNALIGN_64 + #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) -#define SetUi16(p, d) { UInt32 _x_ = (d); \ - ((Byte *)(p))[0] = (Byte)_x_; \ - ((Byte *)(p))[1] = (Byte)(_x_ >> 8); } +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_); \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } + +#endif + -#define SetUi32(p, d) { UInt32 _x_ = (d); \ - ((Byte *)(p))[0] = (Byte)_x_; \ - ((Byte *)(p))[1] = (Byte)(_x_ >> 8); \ - ((Byte *)(p))[2] = (Byte)(_x_ >> 16); \ - ((Byte *)(p))[3] = (Byte)(_x_ >> 24); } -#define SetUi64(p, d) { UInt64 _x64_ = (d); \ - SetUi32(p, (UInt32)_x64_); \ - SetUi32(((Byte *)(p)) + 4, (UInt32)(_x64_ >> 32)); } +#ifdef __has_builtin + #define MY__has_builtin(x) __has_builtin(x) +#else + #define MY__has_builtin(x) 0 #endif -#if defined(MY_CPU_LE_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) + +/* Note: we use bswap instruction, that is unsupported in 386 cpu */ + +#include +#pragma intrinsic(_byteswap_ushort) #pragma intrinsic(_byteswap_ulong) #pragma intrinsic(_byteswap_uint64) -#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) + +/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ +#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) + +#elif defined(MY_CPU_LE_UNALIGN) && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) + +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) #else @@ -114,9 +360,23 @@ Stop_Compiling_Bad_Endian #define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + + +#ifndef GetBe16 + +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) + #endif -#define GetBe16(p) (((UInt16)((const Byte *)(p))[0] << 8) | ((const Byte *)(p))[1]) #ifdef MY_CPU_X86_OR_AMD64 @@ -138,16 +398,46 @@ enum CPU_FIRM_VIA }; -Bool x86cpuid_CheckAndRead(Cx86cpuid *p); +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); int x86cpuid_GetFirm(const Cx86cpuid *p); -#define x86cpuid_GetFamily(p) (((p)->ver >> 8) & 0xFF00F) -#define x86cpuid_GetModel(p) (((p)->ver >> 4) & 0xF00F) -#define x86cpuid_GetStepping(p) ((p)->ver & 0xF) +#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) +#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) +#define x86cpuid_GetStepping(ver) (ver & 0xF) + +BoolInt CPU_Is_InOrder(void); + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) -Bool CPU_Is_InOrder(); -Bool CPU_Is_Aes_Supported(); +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif +#if defined(__APPLE__) +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzFind.c b/uefi_firmware/compression/LZMA/SDK/C/LzFind.c index ede07d9..1b73c28 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzFind.c +++ b/uefi_firmware/compression/LZMA/SDK/C/LzFind.c @@ -1,83 +1,159 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check it. -// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /* LzFind.c -- Match finder for LZ algorithms -2009-04-22 : Igor Pavlov : Public doma*/ +2021-11-29 : Igor Pavlov : Public domain */ + +#include "Precomp.h" #include +// #include +#include "CpuArch.h" #include "LzFind.h" #include "LzHash.h" +#define kBlockMoveAlign (1 << 7) // alignment for memmove() +#define kBlockSizeAlign (1 << 16) // alignment for block allocation +#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary + #define kEmptyHashValue 0 -#define kMaxValForNormalize (0xFFFFFFFFFFFFFFFFLL) -#define kNormalizeStepMin (SizeT)(1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(kNormalizeStepMin - 1)) -#define kMaxHistorySize (3LL << 30) -#define kStartMaxLen 3 +#define kMaxValForNormalize ((UInt32)0) +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug + +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#define GET_AVAIL_BYTES(p) \ + Inline_MatchFinder_GetNumAvailableBytes(p) + + +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +#define kFix5HashSize kFix4HashSize + +/* + HASH2_CALC: + if (hv) match, then cur[0] and cur[1] also match +*/ +#define HASH2_CALC hv = GetUi16(cur); + +// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] + +/* + HASH3_CALC: + if (cur[0]) and (h2) match, then cur[1] also match + if (cur[0]) and (hv) match, then cur[1] and cur[2] also match +*/ +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ + /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ + hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } + +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; -static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc) + +static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { if (!p->directInput) { - alloc->Free(alloc, p->bufferBase); - p->bufferBase = 0; + ISzAlloc_Free(alloc, p->bufferBase); + p->bufferBase = NULL; } } -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ -static int LzInWindow_Create(CMatchFinder *p, SizeT keepSizeReserv, ISzAlloc *alloc) +static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc) { - SizeT blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) - { - p->blockSize = blockSize; - return 1; - } - if (p->bufferBase == 0 || p->blockSize != blockSize) + if (blockSize == 0) + return 0; + if (!p->bufferBase || p->blockSize != blockSize) { + // size_t blockSizeT; LzInWindow_Free(p, alloc); p->blockSize = blockSize; - p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize); + // blockSizeT = blockSize; + + // printf("\nblockSize = 0x%x\n", blockSize); + /* + #if defined _WIN64 + // we can allocate 4GiB, but still use UInt32 for (p->blockSize) + // we use UInt32 type for (p->blockSize), because + // we don't want to wrap over 4 GiB, + // when we use (p->streamPos - p->pos) that is UInt32. + if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign) + { + blockSizeT = ((size_t)1 << 32); + printf("\nchanged to blockSizeT = 4GiB\n"); + } + #endif + */ + + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufferBase); + // return 0; // for debug } - return (p->bufferBase != 0); + return (p->bufferBase != NULL); } -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -Byte MatchFinder_GetIndexByte(const CMatchFinder *p, ptrdiff_t index) { return p->buffer[index]; } +static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -SizeT MatchFinder_GetNumAvailableBytes(const CMatchFinder *p) { return p->streamPos - p->pos; } +static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } -void MatchFinder_ReduceOffsets(CMatchFinder *p, SizeT subValue) -{ - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} +MY_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) return; + + /* We use (p->streamPos - p->pos) value. + (p->streamPos < p->pos) is allowed. */ + if (p->directInput) { - SizeT curSize = 0xFFFFFFFFFFFFFFFFLL - p->streamPos; + UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); if (curSize > p->directInputRem) - curSize = p->directInputRem; + curSize = (UInt32)p->directInputRem; p->directInputRem -= curSize; p->streamPos += curSize; if (p->directInputRem == 0) p->streamEndWasReached = 1; return; } + for (;;) { - Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); + Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufferBase + p->blockSize - dest); if (size == 0) + { + /* we call ReadBlock() after NeedMove() and MoveBlock(). + NeedMove() and MoveBlock() povide more than (keepSizeAfter) + to the end of (blockSize). + So we don't execute this branch in normal code flow. + We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock(). + */ + // p->result = SZ_ERROR_FAIL; // we can show error here return; - p->result = p->stream->Read(p->stream, dest, &size); + } + + // #define kRead 3 + // if (size > kRead) size = kRead; // for debug + + p->result = ISeqInStream_Read(p->stream, dest, &size); if (p->result != SZ_OK) return; if (size == 0) @@ -85,42 +161,53 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) p->streamEndWasReached = 1; return; } - p->streamPos += size; - if (p->streamPos - p->pos > p->keepSizeAfter) + p->streamPos += (UInt32)size; + if (GET_AVAIL_BYTES(p) > p->keepSizeAfter) return; + /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function + (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */ } + + // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter) } + + +MY_NO_INLINE void MatchFinder_MoveBlock(CMatchFinder *p) { + const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; + p->buffer = p->bufferBase + keepBefore; memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); - p->buffer = p->bufferBase + p->keepSizeBefore; + p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + keepBefore + (size_t)GET_AVAIL_BYTES(p)); } -int MatchFinder_NeedMove(const CMatchFinder *p) +/* We call MoveBlock() before ReadBlock(). + So MoveBlock() can be wasteful operation, if the whole input data + can fit in current block even without calling MoveBlock(). + in important case where (dataSize <= historySize) + condition (p->blockSize > dataSize + p->keepSizeAfter) is met + So there is no MoveBlock() in that case case. +*/ + +int MatchFinder_NeedMove(CMatchFinder *p) { if (p->directInput) return 0; - /* if (p->streamEndWasReached) return 0; */ + if (p->streamEndWasReached || p->result != SZ_OK) + return 0; return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) { - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) + if (p->keepSizeAfter >= GET_AVAIL_BYTES(p)) MatchFinder_ReadBlock(p); } -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) -{ - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} + static void MatchFinder_SetDefaultSettings(CMatchFinder *p) { @@ -134,243 +221,678 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) void MatchFinder_Construct(CMatchFinder *p) { - SizeT i; - p->bufferBase = 0; + unsigned i; + p->bufferBase = NULL; p->directInput = 0; - p->hash = 0; + p->hash = NULL; + p->expectedDataSize = (UInt64)(Int64)-1; MatchFinder_SetDefaultSettings(p); for (i = 0; i < 256; i++) { - SizeT r = i; - SizeT j; + UInt32 r = (UInt32)i; + unsigned j; for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1LL) - 1LL)); + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); p->crc[i] = r; } } -static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc) +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->hash); - p->hash = 0; + ISzAlloc_Free(alloc, p->hash); + p->hash = NULL; } -void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc) +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) { MatchFinder_FreeThisClassMemory(p, alloc); LzInWindow_Free(p, alloc); } -static CLzRef* AllocRefs(SizeT num, ISzAlloc *alloc) +static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) { size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) - return 0; - return (CLzRef *)alloc->Alloc(alloc, sizeInBytes); + return NULL; + return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); } -int MatchFinder_Create(CMatchFinder *p, SizeT historySize, - SizeT keepAddBufferBefore, SizeT matchMaxLen, SizeT keepAddBufferAfter, - ISzAlloc *alloc) +#if (kBlockSizeReserveMin < kBlockSizeAlign * 2) + #error Stop_Compiling_Bad_Reserve +#endif + + + +static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) { - SizeT sizeReserv; + UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter); + /* if (historySize > kMaxHistorySize) - { - MatchFinder_Free(p, alloc); return 0; + */ + // printf("\nhistorySize == 0x%x\n", historySize); + + if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow + return 0; + + { + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign; + const UInt32 rem = kBlockSizeMax - blockSize; + const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)) + + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here + if (blockSize >= kBlockSizeMax + || rem < kBlockSizeReserveMin) // we reject settings that will be slow + return 0; + if (reserve >= rem) + blockSize = kBlockSizeMax; + else + { + blockSize += reserve; + blockSize &= ~(UInt32)(kBlockSizeAlign - 1); + } } - sizeReserv = historySize >> 1; - if (historySize > (2LL << 30)) - sizeReserv = historySize >> 2; - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + // printf("\n LzFind_blockSize = %x\n", blockSize); + // printf("\n LzFind_blockSize = %d\n", blockSize >> 20); + return blockSize; +} + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc) +{ + /* we need one additional byte in (p->keepSizeBefore), + since we use MoveBlock() after (p->pos++) and before dictionary using */ + // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - if (LzInWindow_Create(p, sizeReserv, alloc)) + + keepAddBufferAfter += matchMaxLen; + /* we need (p->keepSizeAfter >= p->numHashBytes) */ + if (keepAddBufferAfter < p->numHashBytes) + keepAddBufferAfter = p->numHashBytes; + // keepAddBufferAfter -= 2; // for debug + p->keepSizeAfter = keepAddBufferAfter; + + if (p->directInput) + p->blockSize = 0; + if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) { - SizeT newCyclicBufferSize = historySize + 1; - SizeT hs; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it + UInt32 hs; p->matchMaxLen = matchMaxLen; { + // UInt32 hs4; p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else + hs = (1 << 16) - 1; + if (p->numHashBytes != 2) { - hs = historySize - 1; + hs = historySize; + if (hs > p->expectedDataSize) + hs = (UInt32)p->expectedDataSize; + if (hs != 0) + hs--; hs |= (hs >> 1); hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) + if (hs >= (1 << 24)) { if (p->numHashBytes == 3) hs = (1 << 24) - 1; else hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ } + + // hs = ((UInt32)1 << 25) - 1; // for test + + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; } p->hashMask = hs; hs++; + + /* + hs4 = (1 << 20); + if (hs4 > hs) + hs4 = hs; + // hs4 = (1 << 16); // for test + p->hash4Mask = hs4 - 1; + */ + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; hs += p->fixedHashSize; } { - SizeT prevSize = p->hashSizeSum + p->numSons; - SizeT newSize; + size_t newSize; + size_t numSons; p->historySize = historySize; p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; - p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); - newSize = p->hashSizeSum + p->numSons; - if (p->hash != 0 && prevSize == newSize) + p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) + + numSons = newCyclicBufferSize; + if (p->btMode) + numSons <<= 1; + newSize = hs + numSons; + + // aligned size is not required here, but it can be better for some loops + #define NUM_REFS_ALIGN_MASK 0xF + newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; + + if (p->hash && p->numRefs == newSize) return 1; + MatchFinder_FreeThisClassMemory(p, alloc); + p->numRefs = newSize; p->hash = AllocRefs(newSize, alloc); - if (p->hash != 0) + + if (p->hash) { p->son = p->hash + p->hashSizeSum; return 1; } } } + MatchFinder_Free(p, alloc); return 0; } + static void MatchFinder_SetLimits(CMatchFinder *p) { - SizeT limit = kMaxValForNormalize - p->pos; - SizeT limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - if (limit2 <= p->keepSizeAfter) - { - if (limit2 > 0) - limit2 = 1; - } - else - limit2 -= p->keepSizeAfter; - if (limit2 < limit) - limit = limit2; + UInt32 k; + UInt32 n = kMaxValForNormalize - p->pos; + if (n == 0) + n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0) + + k = p->cyclicBufferSize - p->cyclicBufferPos; + if (k < n) + n = k; + + k = GET_AVAIL_BYTES(p); { - SizeT lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; + const UInt32 ksa = p->keepSizeAfter; + UInt32 mm = p->matchMaxLen; + if (k > ksa) + k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock + else if (k >= mm) + { + // the limitation for (p->lenLimit) update + k -= mm; // optimization : to reduce the number of checks + k++; + // k = 1; // non-optimized version : for debug + } + else + { + mm = k; + if (k != 0) + k = 1; + } + p->lenLimit = mm; } - p->posLimit = p->pos + limit; + if (k < n) + n = k; + + p->posLimit = p->pos + n; } -void MatchFinder_Init(CMatchFinder *p) + +void MatchFinder_Init_LowHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash; + const size_t numItems = p->fixedHashSize; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_HighHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash + p->fixedHashSize; + const size_t numItems = (size_t)p->hashMask + 1; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_4(CMatchFinder *p) { - SizeT i; - for (i = 0; i < p->hashSizeSum; i++) - p->hash[i] = kEmptyHashValue; - p->cyclicBufferPos = 0; p->buffer = p->bufferBase; - p->pos = p->streamPos = p->cyclicBufferSize; + { + /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. + the code in CMatchFinderMt expects (pos = 1) */ + p->pos = + p->streamPos = + 1; // it's smallest optimal value. do not change it + // 0; // for debug + } p->result = SZ_OK; p->streamEndWasReached = 0; +} + + +// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + +void MatchFinder_Init(CMatchFinder *p) +{ + MatchFinder_Init_HighHash(p); + MatchFinder_Init_LowHash(p); + MatchFinder_Init_4(p); + // if (readData) MatchFinder_ReadBlock(p); + + /* if we init (cyclicBufferPos = pos), then we can use one variable + instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */ + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos) + // p->cyclicBufferPos = 0; // smallest value + // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses. MatchFinder_SetLimits(p); } -static SizeT MatchFinder_GetSubValue(const CMatchFinder *p) + + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + #define USE_SATUR_SUB_128 + #define USE_AVX2 + #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1600) + #define USE_SATUR_SUB_128 + #if (_MSC_VER >= 1900) + #define USE_AVX2 + #include // avx + #endif + #endif + #endif + +// #elif defined(MY_CPU_ARM_OR_ARM64) +#elif defined(MY_CPU_ARM64) + + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_SATUR_SUB_128 + #ifdef MY_CPU_ARM64 + // #define ATTRIB_SSE41 __attribute__((__target__(""))) + #else + // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif + + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_SATUR_SUB_128 + #endif + #endif + + #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #include + #else + #include + #endif + +#endif + +/* +#ifndef ATTRIB_SSE41 + #define ATTRIB_SSE41 +#endif +#ifndef ATTRIB_AVX2 + #define ATTRIB_AVX2 +#endif +*/ + +#ifdef USE_SATUR_SUB_128 + +// #define _SHOW_HW_STATUS + +#ifdef _SHOW_HW_STATUS +#include +#define _PRF(x) x +_PRF(;) +#else +#define _PRF(x) +#endif + +#ifdef MY_CPU_ARM_OR_ARM64 + +#ifdef MY_CPU_ARM64 +// #define FORCE_SATUR_SUB_128 +#endif + +typedef uint32x4_t v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); + +#else + +#include // sse4.1 + +typedef __m128i v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 + +#endif + + + +MY_NO_INLINE +static +#ifdef ATTRIB_SSE41 +ATTRIB_SSE41 +#endif +void +MY_FAST_CALL +LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - return (p->pos - p->historySize - 1) & kNormalizeMask; + v128 sub2 = + #ifdef MY_CPU_ARM_OR_ARM64 + vdupq_n_u32(subValue); + #else + _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + #endif + do + { + SASUB_128(0) + SASUB_128(1) + SASUB_128(2) + SASUB_128(3) + items += 4 * 4; + } + while (items != lim); } -void MatchFinder_Normalize3(SizeT subValue, CLzRef *items, SizeT numItems) + + +#ifdef USE_AVX2 + +#include // avx + +#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 + +MY_NO_INLINE +static +#ifdef ATTRIB_AVX2 +ATTRIB_AVX2 +#endif +void +MY_FAST_CALL +LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - SizeT i; - for (i = 0; i < numItems; i++) + __m256i sub2 = _mm256_set_epi32( + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + do { - SizeT value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; + SASUB_256(0) + SASUB_256(1) + items += 2 * 8; } + while (items != lim); } +#endif // USE_AVX2 + +#ifndef FORCE_SATUR_SUB_128 +typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( + UInt32 subValue, CLzRef *items, const CLzRef *lim); +static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; +#endif // FORCE_SATUR_SUB_128 + +#endif // USE_SATUR_SUB_128 + + +// kEmptyHashValue must be zero +// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; +#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; + +#ifdef FORCE_SATUR_SUB_128 -static void MatchFinder_Normalize(CMatchFinder *p) +#define DEFAULT_SaturSub LzFind_SaturSub_128 + +#else + +#define DEFAULT_SaturSub LzFind_SaturSub_32 + +MY_NO_INLINE +static +void +MY_FAST_CALL +LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + do + { + UInt32 v; + SASUB_32(0) + SASUB_32(1) + SASUB_32(2) + SASUB_32(3) + SASUB_32(4) + SASUB_32(5) + SASUB_32(6) + SASUB_32(7) + items += 8; + } + while (items != lim); +} + +#endif + + +MY_NO_INLINE +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - SizeT subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); - MatchFinder_ReduceOffsets(p, subValue); + #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) + + CLzRef *lim; + + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; + } + + { + #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) + lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); + numItems &= K_NORM_ALIGN_MASK; + if (items != lim) + { + #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + if (g_LzFind_SaturSub) + g_LzFind_SaturSub(subValue, items, lim); + else + #endif + DEFAULT_SaturSub(subValue, items, lim); + } + items = lim; + } + + + for (; numItems != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; + } } + + +// call MatchFinder_CheckLimits() only after (p->pos++) update + +MY_NO_INLINE static void MatchFinder_CheckLimits(CMatchFinder *p) { + if (// !p->streamEndWasReached && p->result == SZ_OK && + p->keepSizeAfter == GET_AVAIL_BYTES(p)) + { + // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p)) + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); + } + if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); + if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data. + /* + if we disable normalization for last bytes of data, and + if (data_size == 4 GiB), we don't call wastfull normalization, + but (pos) will be wrapped over Zero (0) in that case. + And we cannot resume later to normal operation + */ + { + // MatchFinder_Normalize(p); + /* after normalization we need (p->pos >= p->historySize + 1); */ + /* we can reduce subValue to aligned value, if want to keep alignment + of (p->pos) and (p->buffer) for speculated accesses. */ + const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; + // const UInt32 subValue = (1 << 15); // for debug + // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + Inline_MatchFinder_ReduceOffsets(p, subValue); + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + } + if (p->cyclicBufferPos == p->cyclicBufferSize) p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); } -static SizeT * Hc_GetMatchesSpec(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byte *cur, CLzRef *son, - SizeT _cyclicBufferPos, SizeT _cyclicBufferSize, SizeT cutValue, - SizeT *distances, SizeT maxLen) + +/* + (lenLimit > maxLen) +*/ +MY_FORCE_INLINE +static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, unsigned maxLen) { + /* son[_cyclicBufferPos] = curMatch; for (;;) { - SizeT delta = pos - curMatch; + UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; + return d; { const Byte *pb = cur - delta; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; if (pb[maxLen] == cur[maxLen] && *pb == *cur) { - SizeT len = 0; + UInt32 len = 0; while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; + maxLen = len; + *d++ = len; + *d++ = delta - 1; if (len == lenLimit) - return distances; + return d; } } } } -} + */ -SizeT * GetMatchesSpec1(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byte *cur, CLzRef *son, - SizeT _cyclicBufferPos, SizeT _cyclicBufferSize, SizeT cutValue, - SizeT *distances, SizeT maxLen) -{ - CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + (_cyclicBufferPos << 1); - SizeT len0 = 0, len1 = 0; - for (;;) + const Byte *lim = cur + lenLimit; + son[_cyclicBufferPos] = curMatch; + + do { - SizeT delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) + UInt32 delta; + + if (curMatch == 0) + break; + // if (curMatch2 >= curMatch) return NULL; + delta = pos - curMatch; + if (delta >= _cyclicBufferSize) + break; { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; + ptrdiff_t diff; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) + { + const Byte *c = cur; + while (*c == c[diff]) + { + if (++c == lim) + { + d[0] = (UInt32)(lim - cur); + d[1] = delta - 1; + return d + 2; + } + } + { + const unsigned len = (unsigned)(c - cur); + if (maxLen < len) + { + maxLen = len; + d[0] = (UInt32)len; + d[1] = delta - 1; + d += 2; + } + } + } } + } + while (--cutValue); + + return d; +} + + +MY_FORCE_INLINE +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, UInt32 maxLen) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + + UInt32 cmCheck; + + // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (cmCheck < curMatch) + do + { + const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; - SizeT len = (len0 < len1 ? len0 : len1); + unsigned len = (len0 < len1 ? len0 : len1); + const UInt32 pair0 = pair[0]; if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) @@ -379,52 +901,65 @@ SizeT * GetMatchesSpec1(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byte *c break; if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; + maxLen = (UInt32)len; + *d++ = (UInt32)len; + *d++ = delta - 1; if (len == lenLimit) { - *ptr1 = pair[0]; + *ptr1 = pair0; *ptr0 = pair[1]; - return distances; + return d; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; + // const UInt32 curMatch2 = pair[1]; + // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + // curMatch = curMatch2; + curMatch = pair[1]; ptr1 = pair + 1; - curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; + curMatch = pair[0]; ptr0 = pair; - curMatch = *ptr0; len0 = len; } } } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return d; } -static void SkipMatchesSpec(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byte *cur, CLzRef *son, - SizeT _cyclicBufferPos, SizeT _cyclicBufferSize, SizeT cutValue) + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { - CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + (_cyclicBufferPos << 1); - SizeT len0 = 0, len1 = 0; - for (;;) + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + + UInt32 cmCheck; + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (// curMatch >= pos || // failure + cmCheck < curMatch) + do { - SizeT delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } + const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; - SizeT len = (len0 < len1 ? len0 : len1); + unsigned len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { while (++len != lenLimit) @@ -442,308 +977,594 @@ static void SkipMatchesSpec(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byt if (pb[len] < cur[len]) { *ptr1 = curMatch; + curMatch = pair[1]; ptr1 = pair + 1; - curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; + curMatch = pair[0]; ptr0 = pair; - curMatch = *ptr0; len0 = len; } } } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return; } + #define MOVE_POS \ ++p->cyclicBufferPos; \ p->buffer++; \ - if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } -#define MOVE_POS_RET MOVE_POS return offset; +#define MOVE_POS_RET MOVE_POS return distances; -static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } +MY_NO_INLINE +static void MatchFinder_MovePos(CMatchFinder *p) +{ + /* we go here at the end of stream data, when (avail < num_hash_bytes) + We don't update sons[cyclicBufferPos << btMode]. + So (sons) record will contain junk. And we cannot resume match searching + to normal operation, even if we will provide more input data in buffer. + p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue + if (p->btMode) + p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue + */ + MOVE_POS; +} #define GET_MATCHES_HEADER2(minLen, ret_op) \ - SizeT lenLimit; SizeT hashValue; const Byte *cur; SizeT curMatch; \ - lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ + lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ cur = p->buffer; -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) +#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); + +#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ + distances = func(MF_PARAMS(p), \ + distances, (UInt32)_maxLen_); MOVE_POS_RET; -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define GET_MATCHES_FOOTER_BT(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, maxLen) - distances; MOVE_POS_RET; +#define GET_MATCHES_FOOTER_HC(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) -#define SKIP_FOOTER \ - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; -static SizeT Bt2_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances) + +#define UPDATE_maxLen { \ + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ + const Byte *c = cur + maxLen; \ + const Byte *lim = cur + lenLimit; \ + for (; c != lim; c++) if (*(c + diff) != *c) break; \ + maxLen = (unsigned)(c - cur); } + +static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - SizeT offset; GET_MATCHES_HEADER(2) HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_BT(1) } -SizeT Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances) +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - SizeT offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_BT(2) } -static SizeT Bt3_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances) + +#define SET_mmm \ + mmm = p->cyclicBufferSize; \ + if (pos < mmm) \ + mmm = pos; + + +static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - SizeT hash2Value, delta2, maxLen, offset; + UInt32 mmm; + UInt32 h2, d2, pos; + unsigned maxLen; + UInt32 *hash; GET_MATCHES_HEADER(3) HASH3_CALC; - delta2 = p->pos - p->hash[hash2Value]; - curMatch = p->hash[kFix3HashSize + hashValue]; + hash = p->hash; + pos = p->pos; + + d2 = pos - hash[h2]; + + curMatch = (hash + kFix3HashSize)[hv]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; + hash[h2] = pos; + (hash + kFix3HashSize)[hv] = pos; + SET_mmm maxLen = 2; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + + if (d2 < mmm && *(cur - d2) == *cur) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[0] = maxLen; - distances[1] = delta2 - 1; - offset = 2; + UPDATE_maxLen + distances[0] = (UInt32)maxLen; + distances[1] = d2 - 1; + distances += 2; if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET; } } - GET_MATCHES_FOOTER(offset, maxLen) + + GET_MATCHES_FOOTER_BT(maxLen) } -static SizeT Bt4_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances) + +static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - SizeT hash2Value, hash3Value, delta2, delta3, maxLen, offset; + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; GET_MATCHES_HEADER(4) HASH4_CALC; - delta2 = p->pos - p->hash[ hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; + hash = p->hash; + pos = p->pos; - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + SET_mmm + + maxLen = 3; + + for (;;) { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET + } + break; } - if (offset != 0) + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + // d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + // (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + SET_mmm + + maxLen = 4; + + for (;;) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else break; - distances[offset - 2] = maxLen; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET; } + break; } - if (maxLen < 3) - maxLen = 3; - GET_MATCHES_FOOTER(offset, maxLen) + + GET_MATCHES_FOOTER_BT(maxLen) } -static SizeT Hc4_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances) + +static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - SizeT hash2Value, hash3Value, delta2, delta3, maxLen, offset; + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; GET_MATCHES_HEADER(4) HASH4_CALC; - delta2 = p->pos - p->hash[ hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + SET_mmm + + maxLen = 3; + + for (;;) { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + break; } - if (offset != 0) + + GET_MATCHES_FOOTER_HC(maxLen); +} + + +static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + // d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + // (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + SET_mmm + + maxLen = 4; + + for (;;) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else break; - distances[offset - 2] = maxLen; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; + UPDATE_maxLen + distances[-2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 3) - maxLen = 3; - offset = Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances); - MOVE_POS_RET + + GET_MATCHES_FOOTER_HC(maxLen); } -SizeT Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances) + +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - SizeT offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances); - MOVE_POS_RET + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_HC(2) } -static void Bt2_MatchFinder_Skip(CMatchFinder *p, SizeT num) + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(2) { - SKIP_HEADER(2) HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; } - while (--num != 0); + SKIP_FOOTER } -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, SizeT num) +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(3) { - SKIP_HEADER(3) HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; } - while (--num != 0); + SKIP_FOOTER } -static void Bt3_MatchFinder_Skip(CMatchFinder *p, SizeT num) +static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(3) { - SizeT hash2Value; - SKIP_HEADER(3) + UInt32 h2; + UInt32 *hash; HASH3_CALC; - curMatch = p->hash[kFix3HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - SKIP_FOOTER + hash = p->hash; + curMatch = (hash + kFix3HashSize)[hv]; + hash[h2] = + (hash + kFix3HashSize)[hv] = p->pos; } - while (--num != 0); + SKIP_FOOTER } -static void Bt4_MatchFinder_Skip(CMatchFinder *p, SizeT num) +static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(4) { - SizeT hash2Value, hash3Value; - SKIP_HEADER(4) + UInt32 h2, h3; + UInt32 *hash; HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = p->pos; - p->hash[kFix4HashSize + hashValue] = p->pos; - SKIP_FOOTER + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; } - while (--num != 0); + SKIP_FOOTER } -static void Hc4_MatchFinder_Skip(CMatchFinder *p, SizeT num) +static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(5) { - SizeT hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS + UInt32 h2, h3; + UInt32 *hash; + HASH5_CALC; + hash = p->hash; + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; } - while (--num != 0); + SKIP_FOOTER } -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, SizeT num) + +#define HC_SKIP_HEADER(minLen) \ + do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ + Byte *cur; \ + UInt32 *hash; \ + UInt32 *son; \ + UInt32 pos = p->pos; \ + UInt32 num2 = num; \ + /* (p->pos == p->posLimit) is not allowed here !!! */ \ + { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + num -= num2; \ + { const UInt32 cycPos = p->cyclicBufferPos; \ + son = p->son + cycPos; \ + p->cyclicBufferPos = cycPos + num2; } \ + cur = p->buffer; \ + hash = p->hash; \ + do { \ + UInt32 curMatch; \ + UInt32 hv; + + +#define HC_SKIP_FOOTER \ + cur++; pos++; *son++ = curMatch; \ + } while (--num2); \ + p->buffer = cur; \ + p->pos = pos; \ + if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ + }} while(num); \ + + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { - SKIP_HEADER(3) + HC_SKIP_HEADER(4) + + UInt32 h2, h3; + HASH4_CALC; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = pos; + + HC_SKIP_FOOTER +} + + +static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + HC_SKIP_HEADER(5) + + UInt32 h2, h3; + HASH5_CALC + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = pos; + + HC_SKIP_FOOTER +} + + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + HC_SKIP_HEADER(3) + HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + curMatch = hash[hv]; + hash[hv] = pos; + + HC_SKIP_FOOTER } -void MatchFinder_CreateVTable(const CMatchFinder *p, IMatchFinder *vTable) + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + if (p->numHashBytes <= 4) + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + } } else if (p->numHashBytes == 2) { @@ -755,9 +1576,53 @@ void MatchFinder_CreateVTable(const CMatchFinder *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; } - else + else if (p->numHashBytes == 4) { vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + } +} + + + +void LzFindPrepare() +{ + #ifndef FORCE_SATUR_SUB_128 + #ifdef USE_SATUR_SUB_128 + LZFIND_SATUR_SUB_CODE_FUNC f = NULL; + #ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + // #pragma message ("=== LzFind NEON") + _PRF(printf("\n=== LzFind NEON\n")); + f = LzFind_SaturSub_128; + } + // f = 0; // for debug + } + #else // MY_CPU_ARM_OR_ARM64 + if (CPU_IsSupported_SSE41()) + { + // #pragma message ("=== LzFind SSE41") + _PRF(printf("\n=== LzFind SSE41\n")); + f = LzFind_SaturSub_128; + + #ifdef USE_AVX2 + if (CPU_IsSupported_AVX2()) + { + // #pragma message ("=== LzFind AVX2") + _PRF(printf("\n=== LzFind AVX2\n")); + f = LzFind_SaturSub_256; + } + #endif + } + #endif // MY_CPU_ARM_OR_ARM64 + g_LzFind_SaturSub = f; + #endif // USE_SATUR_SUB_128 + #endif // FORCE_SATUR_SUB_128 } diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzFind.h b/uefi_firmware/compression/LZMA/SDK/C/LzFind.h index 6bffa6d..eea873f 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzFind.h +++ b/uefi_firmware/compression/LZMA/SDK/C/LzFind.h @@ -1,62 +1,69 @@ /* LzFind.h -- Match finder for LZ algorithms -2009-04-22 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H -#include "Types.h" +#include "7zTypes.h" -#ifdef __cplusplus -extern "C" { -#endif +EXTERN_C_BEGIN -typedef SizeT CLzRef; +typedef UInt32 CLzRef; typedef struct _CMatchFinder { Byte *buffer; - SizeT pos; - SizeT posLimit; - SizeT streamPos; - SizeT lenLimit; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ - SizeT cyclicBufferPos; - SizeT cyclicBufferSize; /* it must be = (historySize + 1) */ + Byte streamEndWasReached; + Byte btMode; + Byte bigHash; + Byte directInput; - SizeT matchMaxLen; + UInt32 matchMaxLen; CLzRef *hash; CLzRef *son; - SizeT hashMask; - SizeT cutValue; + UInt32 hashMask; + UInt32 cutValue; Byte *bufferBase; ISeqInStream *stream; - ptrdiff_t streamEndWasReached; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; - SizeT blockSize; - SizeT keepSizeBefore; - SizeT keepSizeAfter; - - SizeT numHashBytes; - ptrdiff_t directInput; + UInt32 numHashBytes; size_t directInputRem; - ptrdiff_t btMode; - ptrdiff_t bigHash; - SizeT historySize; - SizeT fixedHashSize; - SizeT hashSizeSum; - SizeT numSons; + UInt32 historySize; + UInt32 fixedHashSize; + UInt32 hashSizeSum; SRes result; - SizeT crc[256]; + UInt32 crc[256]; + size_t numRefs; + + UInt64 expectedDataSize; } CMatchFinder; -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) -#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer) -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos)) -int MatchFinder_NeedMove(const CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +/* +#define Inline_MatchFinder_IsFinishedOK(p) \ + ((p)->streamEndWasReached \ + && (p)->streamPos == (p)->pos \ + && (!(p)->directInput || (p)->directInputRem == 0)) +*/ + +int MatchFinder_NeedMove(CMatchFinder *p); +/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */ void MatchFinder_MoveBlock(CMatchFinder *p); void MatchFinder_ReadIfRequired(CMatchFinder *p); @@ -66,16 +73,27 @@ void MatchFinder_Construct(CMatchFinder *p); historySize <= 3 GB keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB */ -int MatchFinder_Create(CMatchFinder *p, SizeT historySize, - SizeT keepAddBufferBefore, SizeT matchMaxLen, SizeT keepAddBufferAfter, - ISzAlloc *alloc); -void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc); -void MatchFinder_Normalize3(SizeT subValue, CLzRef *items, SizeT numItems); -void MatchFinder_ReduceOffsets(CMatchFinder *p, SizeT subValue); +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); -SizeT * GetMatchesSpec1(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byte *buffer, CLzRef *son, - SizeT _cyclicBufferPos, SizeT _cyclicBufferSize, SizeT _cutValue, - SizeT *distances, SizeT maxLen); +/* +#define Inline_MatchFinder_InitPos(p, val) \ + (p)->pos = (val); \ + (p)->streamPos = (val); +*/ + +#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ + (p)->pos -= (subValue); \ + (p)->streamPos -= (subValue); + + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *distances, UInt32 maxLen); /* Conditions: @@ -84,32 +102,35 @@ SizeT * GetMatchesSpec1(SizeT lenLimit, SizeT curMatch, SizeT pos, const Byte *b */ typedef void (*Mf_Init_Func)(void *object); -typedef Byte (*Mf_GetIndexByte_Func)(void *object, SizeT index); -typedef SizeT (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); -typedef SizeT(*Mf_GetMatches_Func)(void *object, SizeT *distances); -typedef void (*Mf_Skip_Func)(void *object, SizeT); +typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef struct _IMatchFinder { Mf_Init_Func Init; - Mf_GetIndexByte_Func GetIndexByte; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; Mf_GetMatches_Func GetMatches; Mf_Skip_Func Skip; -} IMatchFinder; +} IMatchFinder2; -void MatchFinder_CreateVTable(const CMatchFinder *p, IMatchFinder *vTable); +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); +void MatchFinder_Init_LowHash(CMatchFinder *p); +void MatchFinder_Init_HighHash(CMatchFinder *p); +void MatchFinder_Init_4(CMatchFinder *p); void MatchFinder_Init(CMatchFinder *p); -SizeT Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances); -SizeT Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, SizeT *distances); -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, SizeT num); -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, SizeT num); -#ifdef __cplusplus -} -#endif +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); + +void LzFindPrepare(void); + +EXTERN_C_END #endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzHash.h b/uefi_firmware/compression/LZMA/SDK/C/LzHash.h index 6b0b1d3..77b898c 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzHash.h +++ b/uefi_firmware/compression/LZMA/SDK/C/LzHash.h @@ -1,54 +1,34 @@ /* LzHash.h -- HASH functions for LZ algorithms -2009-02-07 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZ_HASH_H #define __LZ_HASH_H +/* + (kHash2Size >= (1 << 8)) : Required + (kHash3Size >= (1 << 16)) : Required +*/ + #define kHash2Size (1 << 10) #define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) +// #define kHash4Size (1 << 20) #define kFix3HashSize (kHash2Size) #define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hashValue = cur[0] | (cur[1] << 8); - -#define HASH3_CALC { \ - SizeT temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hashValue = (temp ^ ((size_t)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - SizeT temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((size_t)cur[2] << 8)) & (kHash3Size - 1); \ - hashValue = (temp ^ ((size_t)cur[2] << 8) ^ ((size_t)p->crc[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - SizeT temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ (cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ (cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ - hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ - hash4Value &= (kHash4Size - 1); } - -/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hashValue = ((cur[2] | (cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - SizeT temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ (cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - SizeT temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ (cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ (cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +/* + We use up to 3 crc values for hash: + crc0 + crc1 << Shift_1 + crc2 << Shift_2 + (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff. + Small values for Shift are not good for collision rate. + Big value for Shift_2 increases the minimum size + of hash table, that will be slow for small files. +*/ + +#define kLzHash_CrcShift_1 5 +#define kLzHash_CrcShift_2 10 #endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.c b/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.c index cdb1250..d6742e5 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.c +++ b/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.c @@ -1,32 +1,42 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check it. -// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /* LzmaDec.c -- LZMA Decoder -2009-09-20 : Igor Pavlov : Public doma*/ +2021-04-01 : Igor Pavlov : Public domain */ -#include "LzmaDec.h" +#include "Precomp.h" #include +/* #include "CpuArch.h" */ +#include "LzmaDec.h" + #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 #define RC_INIT_SIZE 5 +#ifndef _LZMA_DEC_OPT + +#define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } -#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ { UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_1(p); i = (i + i) + 1; A1; } -#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) -#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i); A0; } else \ + { UPDATE_1(p + i); A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } @@ -46,9 +56,20 @@ i -= 0x40; } #endif -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) +#define MATCHED_LITER_DEC \ + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + +#endif // _LZMA_DEC_OPT + + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } -#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; #define UPDATE_1_CHECK range -= bound; code -= bound; #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ @@ -59,25 +80,28 @@ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK; i += m; m += m; } else \ + { UPDATE_1_CHECK; m += m; i += m; } + + #define kNumPosBitsMax 4 #define kNumPosStatesMax (1 << kNumPosBitsMax) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) #define kNumLenProbs (LenHigh + kLenNumHighSymbols) +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) #define kNumStates 12 +#define kNumStates2 16 #define kNumLitStates 7 #define kStartPosModelIndex 4 @@ -91,112 +115,211 @@ #define kAlignTableSize (1 << kNumAlignBits) #define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) #define IsRepG0 (IsRep + kNumStates) #define IsRepG1 (IsRepG0 + kNumStates) #define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) - -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 768LL +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) -#define LzmaProps_GetNumProbs(p) (LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign +#endif -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS #endif + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) + #define LZMA_DIC_MIN (1 << 12) -/* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is withlast normalization +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. + } + +Processing: + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. + Out: + RangeCoder is normalized Result: SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker - = kMatchSpecLenStart + 2 : State Init Marker + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined */ -static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) //-V2008 -{ - CLzmaProb *probs = p->probs; - SizeT state = p->state; - SizeT rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - SizeT pbMask = ((SizeT)1 << (p->prop.pb)) - (SizeT)1; - SizeT lpMask = ((SizeT)1 << (p->prop.lp)) - (SizeT)1; - SizeT lc = p->prop.lc; +#ifdef _LZMA_DEC_OPT + +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); Byte *dic = p->dic; SizeT dicBufSize = p->dicBufSize; SizeT dicPos = p->dicPos; - SizeT processedPos = p->processedPos; - SizeT checkDicSize = p->checkDicSize; - size_t len = 0; + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; const Byte *buf = p->buf; - SizeT range = p->range; - SizeT code = p->code; + UInt32 range = p->range; + UInt32 code = p->code; do { CLzmaProb *prob; - SizeT bound; - SizeT ttt; - SizeT posState = processedPos & pbMask; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0(prob) { - SizeT symbol; + unsigned symbol; UPDATE_0(prob); prob = probs + Literal; - if (checkDicSize != 0 || processedPos != 0) - prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + - (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + if (processedPos != 0 || checkDicSize != 0) + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); + processedPos++; if (state < kNumLitStates) { state -= (state < 4) ? state : 3; symbol = 1; - do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); + #ifdef _LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif } else { - SizeT matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - SizeT offs = 0x100; + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; state -= (state < 10) ? 3 : 6; symbol = 1; + #ifdef _LZMA_SIZE_OPT do { - ptrdiff_t bit; + unsigned bit; CLzmaProb *probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + MATCHED_LITER_DEC } while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif } + dic[dicPos++] = (Byte)symbol; - processedPos++; continue; } - else + { UPDATE_1(prob); prob = probs + IsRep + state; @@ -209,17 +332,22 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte else { UPDATE_1(prob); - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; prob = probs + IsRepG0 + state; IF_BIT_0(prob) { UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0(prob) { UPDATE_0(prob); - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; processedPos++; state = state < kNumLitStates ? 9 : 11; @@ -229,7 +357,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte } else { - SizeT distance; + UInt32 distance; UPDATE_1(prob); prob = probs + IsRepG1 + state; IF_BIT_0(prob) @@ -260,15 +388,17 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte state = state < kNumLitStates ? 8 : 11; prob = probs + RepLenCoder; } + + #ifdef _LZMA_SIZE_OPT { - unsigned limit_local, offset; + unsigned lim, offset; CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; offset = 0; - limit_local = (1 << kLenNumLowBits); + lim = (1 << kLenNumLowBits); } else { @@ -277,46 +407,82 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; - limit_local = (1 << kLenNumMidBits); + lim = (1 << kLenNumLowBits); } else { UPDATE_1(probLen); probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit_local = (1 << kLenNumHighBits); + offset = kLenNumLowSymbols * 2; + lim = (1 << kLenNumHighBits); } } - TREE_DECODE(probLen, limit_local, len); + TREE_DECODE(probLen, lim, len); len += offset; } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE; + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + len -= 8; + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + len += kLenNumLowSymbols * 2; + } + } + } + #endif if (state >= kNumStates) { - SizeT distance; + UInt32 distance; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_6_DECODE(prob, distance); if (distance >= kStartPosModelIndex) { - SizeT posSlot = distance; - ptrdiff_t numDirectBits = ((distance >> 1) - 1); + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); distance = (2 | (distance & 1)); if (posSlot < kEndPosModelIndex) { distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; + prob = probs + SpecPos; { - SizeT mask = 1; - SizeT i = 1; + UInt32 m = 1; + distance++; do { - GET_BIT2(prob + i, i, ; , distance |= mask); - mask <<= 1; + REV_BIT_VAR(prob, distance, m); } - while (--numDirectBits != 0); + while (--numDirectBits); + distance -= m; } } else @@ -328,9 +494,9 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte range >>= 1; { - SizeT t; + UInt32 t; code -= range; - t = (0 - (code >> 63)); /* (UInt32)((Int32)code >> 31) */ + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ distance = (distance << 1) + (t + 1); code += range & t; } @@ -343,56 +509,70 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte } */ } - while (--numDirectBits != 0); + while (--numDirectBits); prob = probs + Align; distance <<= kNumAlignBits; { unsigned i = 1; - GET_BIT2(prob + i, i, ; , distance |= 1); - GET_BIT2(prob + i, i, ; , distance |= 2); - GET_BIT2(prob + i, i, ; , distance |= 4); - GET_BIT2(prob + i, i, ; , distance |= 8); + REV_BIT_CONST(prob, i, 1); + REV_BIT_CONST(prob, i, 2); + REV_BIT_CONST(prob, i, 4); + REV_BIT_LAST (prob, i, 8); + distance |= i; } - if (distance == 0xFFFFFFFFLL) + if (distance == (UInt32)0xFFFFFFFF) { - len += kMatchSpecLenStart; + len = kMatchSpecLenStart; state -= kNumStates; break; } } } + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance + 1; - if (checkDicSize == 0) + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) { - if (distance >= processedPos) - return SZ_ERROR_DATA; + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; } - else if (distance >= checkDicSize) - return SZ_ERROR_DATA; - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; } len += kMatchMinLen; - if (limit == dicPos) - return SZ_ERROR_DATA; { - SizeT rem = limit - dicPos; - SizeT curLen = ((rem < len) ? rem : len); - SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); - processedPos += curLen; + processedPos += (UInt32)curLen; len -= curLen; - if (pos + curLen <= dicBufSize) + if (curLen <= dicBufSize - pos) { Byte *dest = dic + dicPos; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; const Byte *lim = dest + curLen; - dicPos += curLen; + dicPos += (SizeT)curLen; do *(dest) = (Byte)*(dest + src); while (++dest != lim); @@ -411,128 +591,174 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte } } while (dicPos < limit && buf < bufLimit); + NORMALIZE; + p->buf = buf; p->range = range; p->code = code; - p->remainLen = len; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. p->dicPos = dicPos; p->processedPos = processedPos; p->reps[0] = rep0; p->reps[1] = rep1; p->reps[2] = rep2; p->reps[3] = rep3; - p->state = state; - + p->state = (UInt32)state; + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; return SZ_OK; } +#endif + + static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; { - Byte *dic = p->dic; SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - SizeT len = p->remainLen; - SizeT rep0 = p->reps[0]; - if (limit - dicPos < len) - len = limit - dicPos; + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) p->checkDicSize = p->prop.dicSize; - p->processedPos += len; - p->remainLen -= len; - while (len-- != 0) + p->processedPos += (UInt32)len; + p->remainLen -= (UInt32)len; + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do { - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; } + while (--len); p->dicPos = dicPos; } } + +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { - do + if (p->checkDicSize == 0) { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - SizeT rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - } - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); - if (p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - LzmaDec_WriteRem(p, limit); + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - if (p->remainLen > kMatchSpecLenStart) { - p->remainLen = kMatchSpecLenStart; + int res = LZMA_DECODE_REAL(p, limit, bufLimit); + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + return res; } - return 0; } + + typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_INPUT_EOF, /* need more input data */ DUMMY_LIT, DUMMY_MATCH, DUMMY_REP } ELzmaDummy; -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) //-V2008 + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) { - SizeT range = p->range; - SizeT code = p->code; - const Byte *bufLimit = buf + inSize; - CLzmaProb *probs = p->probs; - SizeT state = p->state; + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = *bufOut; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; ELzmaDummy res; + for (;;) { - CLzmaProb *prob; - SizeT bound; - SizeT ttt; - SizeT posState = (p->processedPos) & (((SizeT)1 << p->prop.pb) - (SizeT)1); + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - prob = probs + Literal; if (p->checkDicSize != 0 || p->processedPos != 0) - prob += (LZMA_LIT_SIZE * - ((((p->processedPos) & (((SizeT)1 << (p->prop.lp)) - (SizeT)1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); if (state < kNumLitStates) { - SizeT symbol = 1; + unsigned symbol = 1; do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); } else { - SizeT matchByte = p->dic[p->dicPos - p->reps[0] + - ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; - SizeT offs = 0x100; - SizeT symbol = 1; + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; do { - SizeT bit; - CLzmaProb *probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + unsigned bit; + const CLzmaProb *probLit; + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) } while (symbol < 0x100); } @@ -540,7 +766,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS } else { - SizeT len; + unsigned len; UPDATE_1_CHECK; prob = probs + IsRep + state; @@ -559,12 +785,11 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; + break; } else { @@ -597,12 +822,12 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS prob = probs + RepLenCoder; } { - SizeT limit, offset; - CLzmaProb *probLen = prob + LenChoice; + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; offset = 0; limit = 1 << kLenNumLowBits; } @@ -613,15 +838,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; + limit = 1 << kLenNumLowBits; } else { UPDATE_1_CHECK; probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; + offset = kLenNumLowSymbols * 2; limit = 1 << kLenNumHighBits; } } @@ -631,20 +856,18 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS if (state < 4) { - ptrdiff_t posSlot; + unsigned posSlot; prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - (SizeT)1) << + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1LL << kNumPosSlotBits, posSlot); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) { - SizeT numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + unsigned numDirectBits = ((posSlot >> 1) - 1); if (posSlot < kEndPosModelIndex) { - prob = probs + SpecPos + (((SizeT)2 | (posSlot & (SizeT)1)) << numDirectBits) - posSlot - (SizeT)1; + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); } else { @@ -656,48 +879,44 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS code -= range & (((code - range) >> 31) - 1); /* if (code >= range) code -= range; */ } - while (--numDirectBits != 0); + while (--numDirectBits); prob = probs + Align; numDirectBits = kNumAlignBits; } { - SizeT i = 1; + unsigned i = 1; + unsigned m = 1; do { - GET_BIT_CHECK(prob + i, i); + REV_BIT_CHECK(prob, i, m); } - while (--numDirectBits != 0); + while (--numDirectBits); } } } } + break; } NORMALIZE_CHECK; - return res; -} - -static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data) -{ - p->code = (data[1] << 24) | (data[2] << 16) | (data[3] << 8) | (data[4]) & 0xFFFFFFFFLL; - p->range = 0xFFFFFFFFLL; - p->needFlush = 0; + *bufOut = buf; + return res; } -void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) { - p->needFlush = 1; - p->remainLen = 0; + p->remainLen = kMatchSpecLenStart + 1; p->tempBufSize = 0; if (initDic) { p->processedPos = 0; p->checkDicSize = 0; - p->needInitState = 1; + p->remainLen = kMatchSpecLenStart + 2; } if (initState) - p->needInitState = 1; + p->remainLen = kMatchSpecLenStart + 2; } void LzmaDec_Init(CLzmaDec *p) @@ -706,48 +925,96 @@ void LzmaDec_Init(CLzmaDec *p) LzmaDec_InitDicAndState(p, True, True); } -static void LzmaDec_InitStateReal(CLzmaDec *p) -{ - SizeT numProbs = Literal + (LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); - SizeT i; - CLzmaProb *probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} + +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN__NOT_FINISHED__FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); - *status = LZMA_STATUS_NOT_SPECIFIED; - while (p->remainLen != kMatchSpecLenStart) + if (p->remainLen > kMatchSpecLenStart) { - int checkEndMarkNow; + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); - if (p->needFlush != 0) - { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) - { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; - LzmaDec_InitRc(p, p->tempBuf); - p->tempBufSize = 0; - } + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + for (;;) + { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + + int checkEndMarkNow = 0; - checkEndMarkNow = 0; if (p->dicPos >= dicLimit) { if (p->remainLen == 0 && p->code == 0) @@ -762,83 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + RETURN__NOT_FINISHED__FOR_FINISH; } checkEndMarkNow = 1; } - if (p->needInitState) - LzmaDec_InitStateReal(p); - + // (p->remainLen == 0) + if (p->tempBufSize == 0) { - SizeT processed; const Byte *bufLimit; + int dummyProcessed = -1; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = inSize; + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; } + bufLimit = src; + // we will decode only one iteration } else bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; + + { + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) + { + if (processed > inSize) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + continue; } - else + { - SizeT rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); - if (dummyRes == DUMMY_ERROR) + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) { - (*srcLen) += lookAhead; + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; } } + p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - lookAhead -= (rem + p->buf - p->tempBuf); - (*srcLen) += lookAhead; - src += lookAhead; - inSize -= lookAhead; - p->tempBufSize = 0; + + { + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } } + } } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; } + + SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; @@ -879,19 +1237,19 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *sr } } -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->probs); - p->probs = 0; + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; } -static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->dic); - p->dic = 0; + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; } -void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) { LzmaDec_FreeProbs(p, alloc); LzmaDec_FreeDict(p, alloc); @@ -899,13 +1257,13 @@ void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) { - SizeT dicSize; + UInt32 dicSize; Byte d; if (size < LZMA_PROPS_SIZE) return SZ_ERROR_UNSUPPORTED; else - dicSize = (SizeT)data[1] | ((SizeT)data[2] << 8) | ((SizeT)data[3] << 16) | ((SizeT)data[4] << 24); + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); if (dicSize < LZMA_DIC_MIN) dicSize = LZMA_DIC_MIN; @@ -915,29 +1273,30 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) if (d >= (9 * 5 * 5)) return SZ_ERROR_UNSUPPORTED; - p->lc = d % 9; + p->lc = (Byte)(d % 9); d /= 9; - p->pb = d / 5; - p->lp = d % 5; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); return SZ_OK; } -static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) { - SizeT numProbs = LzmaProps_GetNumProbs(propNew); - if (p->probs == 0 || numProbs != p->numProbs) + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) { LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; - if (p->probs == 0) + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; } return SZ_OK; } -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); @@ -946,18 +1305,28 @@ SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, I return SZ_OK; } -SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; SizeT dicBufSize; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - dicBufSize = propNew.dicSize; - if (p->dic == 0 || dicBufSize != p->dicBufSize) + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) { LzmaDec_FreeDict(p, alloc); - p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); - if (p->dic == 0) + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) { LzmaDec_FreeProbs(p, alloc); return SZ_ERROR_MEM; @@ -970,32 +1339,25 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAlloc *alloc) + ELzmaStatus *status, ISzAllocPtr alloc) { CLzmaDec p; SRes res; - SizeT inSize = *srcLen; - SizeT outSize = *destLen; - *srcLen = *destLen = 0; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; if (inSize < RC_INIT_SIZE) return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); - if (res != 0) - return res; + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); p.dic = dest; p.dicBufSize = outSize; - LzmaDec_Init(&p); - *srcLen = inSize; res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - + *destLen = p.dicPos; if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) res = SZ_ERROR_INPUT_EOF; - - (*destLen) = p.dicPos; LzmaDec_FreeProbs(&p, alloc); return res; } diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.h b/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.h index 763f873..6f12962 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.h +++ b/uefi_firmware/compression/LZMA/SDK/C/LzmaDec.h @@ -1,24 +1,24 @@ /* LzmaDec.h -- LZMA Decoder -2009-02-07 : Igor Pavlov : Public domain */ +2020-03-19 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H -#include "Types.h" +#include "7zTypes.h" -#ifdef __cplusplus -extern "C" { -#endif +EXTERN_C_BEGIN /* #define _LZMA_PROB32 */ /* _LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ +typedef #ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; /* ---------- LZMA Properties ---------- */ @@ -27,8 +27,11 @@ extern "C" { typedef struct _CLzmaProps { - SizeT lc, lp, pb; - SizeT dicSize; + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; + UInt32 dicSize; } CLzmaProps; /* LzmaProps_Decode - decodes properties @@ -49,32 +52,34 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); typedef struct { + /* Don't change this structure. ASM code can use it. */ CLzmaProps prop; CLzmaProb *probs; + CLzmaProb *probs_1664; Byte *dic; - const Byte *buf; - SizeT range, code; - SizeT dicPos; SizeT dicBufSize; - SizeT processedPos; - SizeT checkDicSize; - SizeT state; - SizeT reps[4]; - SizeT remainLen; - int needFlush; - int needInitState; - SizeT numProbs; - SizeT tempBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; + UInt32 processedPos; + UInt32 checkDicSize; + UInt32 reps[4]; + UInt32 state; + UInt32 remainLen; + + UInt32 numProbs; + unsigned tempBufSize; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } +#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } void LzmaDec_Init(CLzmaDec *p); /* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ typedef enum { @@ -131,11 +136,11 @@ LzmaDec_Allocate* can return: SZ_ERROR_UNSUPPORTED - Unsupported properties */ -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); -SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); -void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); /* ---------- Dictionary Interface ---------- */ @@ -144,7 +149,7 @@ void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); You must work with CLzmaDec variables directly in this interface. STEPS: - LzmaDec_Constr() + LzmaDec_Construct() LzmaDec_Allocate() for (each new stream) { @@ -176,6 +181,7 @@ void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, @@ -218,14 +224,13 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAlloc *alloc); + ELzmaStatus *status, ISzAllocPtr alloc); -#ifdef __cplusplus -} -#endif +EXTERN_C_END #endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.c b/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.c index 0e27303..c8b31a1 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.c +++ b/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.c @@ -1,7 +1,7 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check it. -// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /* LzmaEnc.c -- LZMA Encoder -2010-04-16 : Igor Pavlov : Public doma*/ +2022-07-15: Igor Pavlov : Public domain */ + +#include "Precomp.h" #include @@ -12,6 +12,7 @@ #include #endif +#include "CpuArch.h" #include "LzmaEnc.h" #include "LzFind.h" @@ -19,38 +20,48 @@ #include "LzFindMt.h" #endif -#ifdef SHOW_STAT -static int ttt = 0; -#endif +/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ -#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); +void LzmaEnc_Finish(CLzmaEncHandle pp); +void LzmaEnc_SaveState(CLzmaEncHandle pp); +void LzmaEnc_RestoreState(CLzmaEncHandle pp); -#define kBlockSize (9 << 10) -#define kUnpackBlockSize (1 << 18) -#define kMatchArraySize (1 << 21) -#define kLzmaMaxHistorySize ((SizeT)3 << 29) -#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) +#ifdef SHOW_STAT +static unsigned g_STAT_OFFSET = 0; +#endif -#define kNumMaxDirectBits (31) +/* for good normalization speed we still reserve 256 MB before 4 GB range */ +#define kLzmaMaxHistorySize ((UInt32)15 << 28) #define kNumTopBits 24 -#define kTopValue ((SizeT)1 << kNumTopBits) +#define kTopValue ((UInt32)1 << kNumTopBits) -#define kNumBitModelTotalBits 11LL +#define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define kProbInitValue (kBitModelTotal >> 1) #define kNumMoveReducingBits 4 #define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) +// #define kBitPrice (1 << kNumBitPriceShiftBits) + +#define REP_LEN_COUNT 64 void LzmaEncProps_Init(CLzmaEncProps *p) { p->level = 5; p->dictSize = p->mc = 0; + p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->writeEndMark = 0; + p->affinity = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -58,130 +69,236 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) int level = p->level; if (level < 0) level = 5; p->level = level; - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); + + if (p->dictSize == 0) + p->dictSize = + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) + ))); + + if (p->dictSize > p->reduceSize) + { + UInt32 v = (UInt32)p->reduceSize; + const UInt32 kReduceMin = ((UInt32)1 << 12); + if (v < kReduceMin) + v = kReduceMin; + if (p->dictSize > v) + p->dictSize = v; + } + if (p->lc < 0) p->lc = 3; if (p->lp < 0) p->lp = 0; if (p->pb < 0) p->pb = 2; + if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); + if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numThreads < 0) p->numThreads = -#ifndef _7ZIP_ST - ((p->btMode && p->algo) ? 2 : 1); -#else - 1; -#endif + #ifndef _7ZIP_ST + ((p->btMode && p->algo) ? 2 : 1); + #else + 1; + #endif } -SizeT LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) { CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); return props.dictSize; } -/* #define LZMA_LOG_BSR */ -/* Define it for Intel's CPU */ +/* +x86/x64: + +BSR: + IF (SRC == 0) ZF = 1, DEST is undefined; + AMD : DEST is unchanged; + IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit + BSR is slow in some processors + +LZCNT: + IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64) + IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits + IF (DEST == 0) ZF = 1; + +LZCNT works only in new processors starting from Haswell. +if LZCNT is not supported by processor, then it's executed as BSR. +LZCNT can be faster than BSR, if supported. +*/ + +// #define LZMA_LOG_BSR + +#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */ + + #if (defined(__clang__) && (__clang_major__ >= 6)) \ + || (defined(__GNUC__) && (__GNUC__ >= 6)) + #define LZMA_LOG_BSR + #elif defined(_MSC_VER) && (_MSC_VER >= 1300) + // #if defined(MY_CPU_ARM_OR_ARM64) + #define LZMA_LOG_BSR + // #endif + #endif +#endif + +// #include #ifdef LZMA_LOG_BSR -#define kDicLogSizeMaxCompress 30 +#if defined(__clang__) \ + || defined(__GNUC__) + +/* + C code: : (30 - __builtin_clz(x)) + gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31) + clang10 for x64 : 31 + (bsr(x) xor -32) +*/ + + #define MY_clz(x) ((unsigned)__builtin_clz(x)) + // __lzcnt32 + // __builtin_ia32_lzcnt_u32 + +#else // #if defined(_MSC_VER) + + #ifdef MY_CPU_ARM_OR_ARM64 + + #define MY_clz _CountLeadingZeros + + #else // if defined(MY_CPU_X86_OR_AMD64) + + // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU) + // _BitScanReverse code is not optimal for some MSVC compilers + #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \ + res = (zz + zz) + (pos >> zz); } + + #endif // MY_CPU_X86_OR_AMD64 + +#endif // _MSC_VER -#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); } -SizeT GetPosSlot1(SizeT pos) +#ifndef BSR2_RET + + #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \ + res = (zz + zz) + (pos >> zz); } + +#endif + + +unsigned GetPosSlot1(UInt32 pos); +unsigned GetPosSlot1(UInt32 pos) { - SizeT res; + unsigned res; BSR2_RET(pos, res); return res; } #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } -#else -#define kNumLogBits (9 + (int)sizeof(size_t) / 2) +#else // ! LZMA_LOG_BSR + +#define kNumLogBits (11 + sizeof(size_t) / 8 * 3) + #define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) -void LzmaEnc_FastPosInit(Byte *g_FastPos) +static void LzmaEnc_FastPosInit(Byte *g_FastPos) { - ptrdiff_t c = 2, slotFast; + unsigned slot; g_FastPos[0] = 0; g_FastPos[1] = 1; - - for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) - { - SizeT k = (1LL << ((slotFast >> 1) - 1)); - SizeT j; - for (j = 0; j < k; j++, c++) - g_FastPos[c] = (Byte)slotFast; + g_FastPos += 2; + + for (slot = 2; slot < kNumLogBits * 2; slot++) + { + size_t k = ((size_t)1 << ((slot >> 1) - 1)); + size_t j; + for (j = 0; j < k; j++) + g_FastPos[j] = (Byte)slot; + g_FastPos += k; } } -#define BSR2_RET(pos, res) { i = 6 + ((kNumLogBits - 1) & \ - (0 - (((((SizeT)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ - res = p->g_FastPos[pos >> i] + (i * 2); } +/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } + /* #define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ -p->g_FastPos[pos >> 6] + 12 : \ -p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } */ #define GetPosSlot1(pos) p->g_FastPos[pos] #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } -#endif +#endif // LZMA_LOG_BSR #define LZMA_NUM_REPS 4 -typedef SizeT CState; +typedef UInt16 CState; +typedef UInt16 CExtra; typedef struct { - SizeT price; - + UInt32 price; CState state; - int prev1IsChar; - int prev2; - - SizeT posPrev2; - SizeT backPrev2; - - SizeT posPrev; - SizeT backPrev; - SizeT backs[LZMA_NUM_REPS]; + CExtra extra; + // 0 : normal + // 1 : LIT : MATCH + // > 1 : MATCH (extra-1) : LIT : REP0 (len) + UInt32 len; + UInt32 dist; + UInt32 reps[LZMA_NUM_REPS]; } COptimal; -#define kNumOpts (1 << 12) + +// 18.06 +#define kNumOpts (1 << 11) +#define kPackReserve (kNumOpts * 8) +// #define kNumOpts (1 << 12) +// #define kPackReserve (1 + kNumOpts * 2) #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 -#define kDicLogSizeM0 +// #define kDicLogSizeMin 0 #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) - #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kAlignMask (kAlignTableSize - 1) #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 -#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) - #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) +typedef #ifdef _LZMA_PROB32 -#define CLzmaProb SizeT + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; #define LZMA_PB_MAX 4 #define LZMA_LC_MAX 8 @@ -189,208 +306,247 @@ typedef struct #define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) +#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) #define LZMA_MATCH_LEN_MIN 2 #define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) #define kNumStates 12 + typedef struct { - CLzmaProb choice; - CLzmaProb choice2; - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; - CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; CLzmaProb high[kLenNumHighSymbols]; } CLenEnc; + typedef struct { - CLenEnc p; - SizeT prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - SizeT tableSize; - SizeT counters[LZMA_NUM_PB_STATES_MAX]; + unsigned tableSize; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2]; + // UInt32 prices2[kLenNumSymbolsTotal]; } CLenPriceEnc; +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN]) + +/* +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9))) +*/ + typedef struct { - SizeT range; - SizeT low; - SizeT cacheSize; - SizeT processed; - ISeqOutStream *outStream; - SRes res; - Byte cache; + UInt32 range; + unsigned cache; + UInt64 low; + UInt64 cacheSize; Byte *buf; Byte *bufLim; Byte *bufBase; + ISeqOutStream *outStream; + UInt64 processed; + SRes res; } CRangeEnc; + typedef struct { CLzmaProb *litProbs; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + unsigned state; + UInt32 reps[LZMA_NUM_REPS]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - SizeT reps[LZMA_NUM_REPS]; - SizeT state; } CSaveState; + +typedef UInt32 CProbPrice; + + typedef struct { - IMatchFinder matchFinder; void *matchFinderObj; + IMatchFinder2 matchFinder; -#ifndef _7ZIP_ST - Bool mtMode; - CMatchFinderMt matchFinderMt; -#endif + unsigned optCur; + unsigned optEnd; - CMatchFinder matchFinderBase; + unsigned longestMatchLen; + unsigned numPairs; + UInt32 numAvail; -#ifndef _7ZIP_ST - Byte pad[128]; -#endif + unsigned state; + unsigned numFastBytes; + unsigned additionalOffset; + UInt32 reps[LZMA_NUM_REPS]; + unsigned lpMask, pbMask; + CLzmaProb *litProbs; + CRangeEnc rc; - SizeT optimumEndIndex; - SizeT optimumCurrentIndex; + UInt32 backRes; - SizeT longestMatchLength; - SizeT numPairs; - SizeT numAvail; - COptimal opt[kNumOpts]; + unsigned lc, lp, pb; + unsigned lclp; -#ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; -#endif + BoolInt fastMode; + BoolInt writeEndMark; + BoolInt finished; + BoolInt multiThread; + BoolInt needInit; + // BoolInt _maxMode; - SizeT ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - SizeT matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; - SizeT numFastBytes; - SizeT additionalOffset; - SizeT reps[LZMA_NUM_REPS]; - SizeT state; + UInt64 nowPos64; + + unsigned matchPriceCount; + // unsigned alignPriceCount; + int repLenEncCounter; - SizeT posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; - SizeT distancesPrices[kNumLenToPosStates][kNumFullDistances]; - SizeT alignPrices[kAlignTableSize]; - SizeT alignPriceCount; + unsigned distTableSize; - SizeT distTableSize; + UInt32 dictSize; + SRes result; - SizeT lc, lp, pb; - SizeT lpMask, pbMask; + #ifndef _7ZIP_ST + BoolInt mtMode; + // begin of CMatchFinderMt is used in LZ thread + CMatchFinderMt matchFinderMt; + // end of CMatchFinderMt is used in BT and HASH threads + // #else + // CMatchFinder matchFinderBase; + #endif + CMatchFinder matchFinderBase; - CLzmaProb *litProbs; + + // we suppose that we have 8-bytes alignment after CMatchFinder + + #ifndef _7ZIP_ST + Byte pad[128]; + #endif + + // LZ thread + CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + // we want {len , dist} pairs to be 8-bytes aligned in matches array + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2]; + + // we want 8-bytes alignment here + UInt32 alignPrices[kAlignTableSize]; + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; - ptrdiff_t lclp; - - Bool fastMode; + COptimal opt[kNumOpts]; - CRangeEnc rc; + CSaveState saveState; - Bool writeEndMark; - UInt64 nowPos64; - SizeT matchPriceCount; - Bool finished; - Bool multiThread; + // BoolInt mf_Failure; + #ifndef _7ZIP_ST + Byte pad2[128]; + #endif +} CLzmaEnc; - SRes result; - SizeT dictSize; - SizeT matchFinderCycles; - int needInit; +#define MFB (p->matchFinderBase) +/* +#ifndef _7ZIP_ST +#define MFB (p->matchFinderMt.MatchFinder) +#endif +*/ - CSaveState saveState; -} CLzmaEnc; +#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); void LzmaEnc_SaveState(CLzmaEncHandle pp) { CLzmaEnc *p = (CLzmaEnc *)pp; CSaveState *dest = &p->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, ((SizeT)0x300 << p->lclp) * sizeof(CLzmaProb)); + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); + + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); } + void LzmaEnc_RestoreState(CLzmaEncHandle pp) { CLzmaEnc *dest = (CLzmaEnc *)pp; const CSaveState *p = &dest->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; + dest->state = p->state; - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, ((SizeT)0x300 << dest->lclp) * sizeof(CLzmaProb)); + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); + + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); } + + SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) { CLzmaEnc *p = (CLzmaEnc *)pp; @@ -398,102 +554,129 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) LzmaEncProps_Normalize(&props); if (props.lc > LZMA_LC_MAX - || props.lp > LZMA_LP_MAX - || props.pb > LZMA_PB_MAX - || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) - || props.dictSize > kLzmaMaxHistorySize) //-V590 + || props.lp > LZMA_LP_MAX + || props.pb > LZMA_PB_MAX) return SZ_ERROR_PARAM; + + if (props.dictSize > kLzmaMaxHistorySize) + props.dictSize = kLzmaMaxHistorySize; + + #ifndef LZMA_LOG_BSR + { + const UInt64 dict64 = props.dictSize; + if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress)) + return SZ_ERROR_PARAM; + } + #endif + p->dictSize = props.dictSize; - p->matchFinderCycles = props.mc; { - SizeT fb = props.fb; + unsigned fb = (unsigned)props.fb; if (fb < 5) fb = 5; if (fb > LZMA_MATCH_LEN_MAX) fb = LZMA_MATCH_LEN_MAX; p->numFastBytes = fb; } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; + p->lc = (unsigned)props.lc; + p->lp = (unsigned)props.lp; + p->pb = (unsigned)props.pb; p->fastMode = (props.algo == 0); - p->matchFinderBase.btMode = props.btMode; + // p->_maxMode = True; + MFB.btMode = (Byte)(props.btMode ? 1 : 0); { - SizeT numHashBytes = 4; + unsigned numHashBytes = 4; if (props.btMode) { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; + if (props.numHashBytes < 2) numHashBytes = 2; + else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes; } - p->matchFinderBase.numHashBytes = numHashBytes; + if (props.numHashBytes >= 5) numHashBytes = 5; + + MFB.numHashBytes = numHashBytes; } - p->matchFinderBase.cutValue = props.mc; + MFB.cutValue = props.mc; - p->writeEndMark = props.writeEndMark & 0x1LL; + p->writeEndMark = (BoolInt)props.writeEndMark; -#ifndef _7ZIP_ST + #ifndef _7ZIP_ST /* if (newMultiThread != _multiThread) { - ReleaseMatchFinder(); - _multiThread = newMultiThread; + ReleaseMatchFinder(); + _multiThread = newMultiThread; } */ p->multiThread = (props.numThreads > 1); -#endif + p->matchFinderMt.btSync.affinity = + p->matchFinderMt.hashSync.affinity = props.affinity; + #endif return SZ_OK; } -static const int kLiteralNextStates[kNumStates] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; -static const int kMatchNextStates[kNumStates] = { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 }; -static const int kRepNextStates[kNumStates] = { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 }; -static const int kShortRepNextStates[kNumStates] = { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 }; -#define IsCharState(s) ((s) < 7) +void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + MFB.expectedDataSize = expectedDataSiize; +} + + +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 + +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) #define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) #define kInfinityPrice (1 << 30) static void RangeEnc_Construct(CRangeEnc *p) { - p->outStream = 0; - p->bufBase = 0; + p->outStream = NULL; + p->bufBase = NULL; } -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) #define RC_BUF_SIZE (1 << 16) -static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc) + +static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) { - if (p->bufBase == 0) + if (!p->bufBase) { - p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE); - if (p->bufBase == 0) + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); + if (!p->bufBase) return 0; p->bufLim = p->bufBase + RC_BUF_SIZE; } return 1; } -static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc) +static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->bufBase); - p->bufBase = 0; + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; } static void RangeEnc_Init(CRangeEnc *p) { - /* Stream.Init(); */ - p->low = 0; - p->range = 0xFFFFFFFFLL; - p->cacheSize = 1; + p->range = 0xFFFFFFFF; p->cache = 0; + p->low = 0; + p->cacheSize = 0; p->buf = p->bufBase; @@ -501,36 +684,48 @@ static void RangeEnc_Init(CRangeEnc *p) p->res = SZ_OK; } -static void RangeEnc_FlushStream(CRangeEnc *p) +MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != p->outStream->Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; + const size_t num = (size_t)(p->buf - p->bufBase); + if (p->res == SZ_OK) + { + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + } p->processed += num; p->buf = p->bufBase; } -static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) { - if ((SizeT)p->low < 0xFF000000LL || (ptrdiff_t)(p->low >> 32) != 0) + UInt32 low = (UInt32)p->low; + unsigned high = (unsigned)(p->low >> 32); + p->low = (UInt32)(low << 8); + if (low < (UInt32)0xFF000000 || high != 0) { - Byte temp = p->cache; - do { Byte *buf = p->buf; - *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); + *buf++ = (Byte)(p->cache + high); + p->cache = (unsigned)(low >> 24); p->buf = buf; if (buf == p->bufLim) RangeEnc_FlushStream(p); - temp = 0xFF; - } while (--p->cacheSize != 0); - p->cache = (Byte)((SizeT)p->low >> 24); + if (p->cacheSize == 0) + return; + } + high += 0xFF; + for (;;) + { + Byte *buf = p->buf; + *buf++ = (Byte)(high); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (--p->cacheSize == 0) + return; + } } p->cacheSize++; - p->low = (p->low << 8) & 0xFFFFFFFFLL; } static void RangeEnc_FlushData(CRangeEnc *p) @@ -540,634 +735,788 @@ static void RangeEnc_FlushData(CRangeEnc *p) RangeEnc_ShiftLow(p); } -static void RangeEnc_EncodeDirectBits(CRangeEnc *p, SizeT value, ptrdiff_t numBits) -{ - do - { - p->range >>= 1; - p->low += p->range & (0 - ((value >> --numBits) & 1)); - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } - } while (numBits != 0); -} +#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } -static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, SizeT symbol) -{ - SizeT ttt = *prob; - SizeT newBound = (p->range >> kNumBitModelTotalBits) * ttt; - if (symbol == 0) - { - p->range = newBound; - ttt += (kBitModelTotal - ttt) >> kNumMoveBits; - } - else - { - p->low += newBound; - p->range -= newBound; - ttt -= ttt >> kNumMoveBits; +#define RC_BIT_PRE(p, prob) \ + ttt = *(prob); \ + newBound = (range >> kNumBitModelTotalBits) * ttt; + +// #define _LZMA_ENC_USE_BRANCH + +#ifdef _LZMA_ENC_USE_BRANCH + +#define RC_BIT(p, prob, bit) { \ + RC_BIT_PRE(p, prob) \ + if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ + else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ } - *prob = (CLzmaProb)ttt; - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); + +#else + +#define RC_BIT(p, prob, bit) { \ + UInt32 mask; \ + RC_BIT_PRE(p, prob) \ + mask = 0 - (UInt32)bit; \ + range &= mask; \ + mask &= newBound; \ + range -= mask; \ + (p)->low += mask; \ + mask = (UInt32)bit - 1; \ + range += newBound & mask; \ + mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ + mask += ((1 << kNumMoveBits) - 1); \ + ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ } + +#endif + + + + +#define RC_BIT_0_BASE(p, prob) \ + range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + +#define RC_BIT_1_BASE(p, prob) \ + range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ + +#define RC_BIT_0(p, prob) \ + RC_BIT_0_BASE(p, prob) \ + RC_NORM(p) + +#define RC_BIT_1(p, prob) \ + RC_BIT_1_BASE(p, prob) \ + RC_NORM(p) + +static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) +{ + UInt32 range, ttt, newBound; + range = p->range; + RC_BIT_PRE(p, prob) + RC_BIT_0(p, prob) + p->range = range; } -static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, SizeT symbol) +static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) { - symbol |= 0x100; + UInt32 range = p->range; + sym |= 0x100; do { - RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); - symbol <<= 1; - } while (symbol < 0x10000); + UInt32 ttt, newBound; + // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1); + CLzmaProb *prob = probs + (sym >> 8); + UInt32 bit = (sym >> 7) & 1; + sym <<= 1; + RC_BIT(p, prob, bit); + } + while (sym < 0x10000); + p->range = range; } -static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, SizeT symbol, SizeT matchByte) +static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte) { - SizeT offs = 0x100; - symbol |= 0x100; + UInt32 range = p->range; + UInt32 offs = 0x100; + sym |= 0x100; do { + UInt32 ttt, newBound; + CLzmaProb *prob; + UInt32 bit; matchByte <<= 1; - RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } while (symbol < 0x10000); + // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1); + prob = probs + (offs + (matchByte & offs) + (sym >> 8)); + bit = (sym >> 7) & 1; + sym <<= 1; + offs &= ~(matchByte ^ sym); + RC_BIT(p, prob, bit); + } + while (sym < 0x10000); + p->range = range; } -void LzmaEnc_InitPriceTables(SizeT *ProbPrices) + + +static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) { - SizeT i; - for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) + UInt32 i; + for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) { - const ptrdiff_t kCyclesBits = kNumBitPriceShiftBits; - SizeT w = i; - SizeT bitCount = 0; - ptrdiff_t j; + const unsigned kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); + unsigned bitCount = 0; + unsigned j; for (j = 0; j < kCyclesBits; j++) { w = w * w; bitCount <<= 1; - while (w >= (1LL << 16)) + while (w >= ((UInt32)1 << 16)) { w >>= 1; bitCount++; } } - ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15LL - bitCount); + ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + // printf("\n%3d: %5d", i, ProbPrices[i]); } } -#define GET_PRICE(prob, symbol) \ - p->ProbPrices[((prob) ^ (((-(ptrdiff_t)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; +#define GET_PRICE(prob, bit) \ + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; -#define GET_PRICEa(prob, symbol) \ - ProbPrices[((prob) ^ ((-((ptrdiff_t)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; +#define GET_PRICEa(prob, bit) \ + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] -#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] +#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + -static SizeT LitEnc_GetPrice(const CLzmaProb *probs, SizeT symbol, const SizeT *ProbPrices) +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices) { - SizeT price = 0; - symbol |= 0x100; + UInt32 price = 0; + sym |= 0x100; do { - price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); - symbol <<= 1; - } while (symbol < 0x10000); + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); return price; } -static SizeT LitEnc_GetPriceMatched(const CLzmaProb *probs, SizeT symbol, SizeT matchByte, const SizeT *ProbPrices) + +static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices) { - SizeT price = 0; - SizeT offs = 0x100; - symbol |= 0x100; + UInt32 price = 0; + UInt32 offs = 0x100; + sym |= 0x100; do { matchByte <<= 1; - price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } while (symbol < 0x10000); + price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1); + sym <<= 1; + offs &= ~(matchByte ^ sym); + } + while (sym < 0x10000); return price; } -static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, ptrdiff_t numBitLevels, SizeT symbol) +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym) { - SizeT m = 1; - ptrdiff_t i; - for (i = numBitLevels; i != 0;) - { - SizeT bit; - i--; - bit = (symbol >> i) & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - } -} - -static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, ptrdiff_t numBitLevels, SizeT symbol) -{ - SizeT m = 1; - ptrdiff_t i; - for (i = 0; i < numBitLevels; i++) + UInt32 range = rc->range; + unsigned m = 1; + do { - SizeT bit = symbol & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); + UInt32 ttt, newBound; + unsigned bit = sym & 1; + // RangeEnc_EncodeBit(rc, probs + m, bit); + sym >>= 1; + RC_BIT(rc, probs + m, bit); m = (m << 1) | bit; - symbol >>= 1; } + while (--numBits); + rc->range = range; } -static SizeT RcTree_GetPrice(const CLzmaProb *probs, ptrdiff_t numBitLevels, SizeT symbol, const SizeT *ProbPrices) -{ - SizeT price = 0; - symbol |= (1LL << numBitLevels); - while (symbol != 1) - { - price += GET_PRICEa(probs[symbol >> 1], symbol & 1); - symbol >>= 1; - } - return price; -} - -static SizeT RcTree_ReverseGetPrice(const CLzmaProb *probs, ptrdiff_t numBitLevels, SizeT symbol, const SizeT *ProbPrices) -{ - SizeT price = 0; - SizeT m = 1; - ptrdiff_t i; - for (i = numBitLevels; i != 0; i--) - { - SizeT bit = symbol & 1; - symbol >>= 1; - price += GET_PRICEa(probs[m], bit); - m = (m << 1) | bit; - } - return price; -} static void LenEnc_Init(CLenEnc *p) { unsigned i; - p->choice = p->choice2 = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) p->low[i] = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) - p->mid[i] = kProbInitValue; for (i = 0; i < kLenNumHighSymbols; i++) p->high[i] = kProbInitValue; } -static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, SizeT symbol, SizeT posState) +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState) { - if (symbol < kLenNumLowSymbols) + UInt32 range, ttt, newBound; + CLzmaProb *probs = p->low; + range = rc->range; + RC_BIT_PRE(rc, probs); + if (sym >= kLenNumLowSymbols) { - RangeEnc_EncodeBit(rc, &p->choice, 0); - RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice, 1); - if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) + RC_BIT_1(rc, probs); + probs += kLenNumLowSymbols; + RC_BIT_PRE(rc, probs); + if (sym >= kLenNumLowSymbols * 2) { - RangeEnc_EncodeBit(rc, &p->choice2, 0); - RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice2, 1); - RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); + RC_BIT_1(rc, probs); + rc->range = range; + // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); + LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); + return; } + sym -= kLenNumLowSymbols; } -} -static void LenEnc_SetPrices(const CLenEnc *p, SizeT posState, SizeT numSymbols, SizeT *prices, const SizeT *ProbPrices) -{ - SizeT a0 = GET_PRICE_0a(p->choice); - SizeT a1 = GET_PRICE_1a(p->choice); - SizeT b0 = a1 + GET_PRICE_0a(p->choice2); - SizeT b1 = a1 + GET_PRICE_1a(p->choice2); - SizeT i = 0; - for (i = 0; i < kLenNumLowSymbols; i++) - { - if (i >= numSymbols) - return; - prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); - } - for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym); { - if (i >= numSymbols) - return; - prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); + unsigned m; + unsigned bit; + RC_BIT_0(rc, probs); + probs += (posState << (1 + kLenNumLowBits)); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit); + rc->range = range; } - for (; i < numSymbols; i++) - prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); } -static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, SizeT posState, const SizeT *ProbPrices) +static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) { - LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); - p->counters[posState] = p->tableSize; + unsigned i; + for (i = 0; i < 8; i += 2) + { + UInt32 price = startPrice; + UInt32 prob; + price += GET_PRICEa(probs[1 ], (i >> 2)); + price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); + prob = probs[4 + (i >> 1)]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + } } -static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, SizeT numPosStates, const SizeT *ProbPrices) -{ - SizeT posState; - for (posState = 0; posState < numPosStates; posState++) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} -static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, SizeT symbol, SizeT posState, Bool updatePrice, const SizeT *ProbPrices) +MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( + CLenPriceEnc *p, + unsigned numPosStates, + const CLenEnc *enc, + const CProbPrice *ProbPrices) { - LenEnc_Encode(&p->p, rc, symbol, posState); - if (updatePrice) - if (--p->counters[posState] == 0) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} + UInt32 b; + + { + unsigned prob = enc->low[0]; + UInt32 a, c; + unsigned posState; + b = GET_PRICEa_1(prob); + a = GET_PRICEa_0(prob); + c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (posState = 0; posState < numPosStates; posState++) + { + UInt32 *prices = p->prices[posState]; + const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); + SetPrices_3(probs, a, prices, ProbPrices); + SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices); + } + } + /* + { + unsigned i; + UInt32 b; + a = GET_PRICEa_0(enc->low[0]); + for (i = 0; i < kLenNumLowSymbols; i++) + p->prices2[i] = a; + a = GET_PRICEa_1(enc->low[0]); + b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++) + p->prices2[i] = b; + a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + } + */ + + // p->counter = numSymbols; + // p->counter = 64; + { + unsigned i = p->tableSize; + + if (i > kLenNumLowSymbols * 2) + { + const CLzmaProb *probs = enc->high; + UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2; + i -= kLenNumLowSymbols * 2 - 1; + i >>= 1; + b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + do + { + /* + p->prices2[i] = a + + // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); + LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices); + */ + // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices); + unsigned sym = --i + (1 << (kLenNumHighBits - 1)); + UInt32 price = b; + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + { + unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); + prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); + } + } + while (i); -static void MovePos(CLzmaEnc *p, SizeT num) -{ -#ifdef SHOW_STAT - ttt += num; - printf("\n MovePos %d", num); -#endif - if (num != 0) - { - p->additionalOffset += num; - p->matchFinder.Skip(p->matchFinderObj, num); + { + unsigned posState; + size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + for (posState = 1; posState < numPosStates; posState++) + memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); + } + } } } -static SizeT ReadMatchDistances(CLzmaEnc *p, SizeT *numDistancePairsRes) +/* + #ifdef SHOW_STAT + g_STAT_OFFSET += num; + printf("\n MovePos %u", num); + #endif +*/ + +#define MOVE_POS(p, num) { \ + p->additionalOffset += (num); \ + p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); } + + +static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { - SizeT lenRes = 0, numPairs; + unsigned numPairs; + + p->additionalOffset++; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); -#ifdef SHOW_STAT - printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); - ttt++; { - SizeT i; + const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; } + numPairs = (unsigned)(d - p->matches); + } + *numPairsRes = numPairs; + + #ifdef SHOW_STAT + printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); + g_STAT_OFFSET++; + { + unsigned i; for (i = 0; i < numPairs; i += 2) - printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); + printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); } -#endif - if (numPairs > 0) + #endif + + if (numPairs == 0) + return 0; { - lenRes = p->matches[numPairs - 2]; - if (lenRes == p->numFastBytes) + const unsigned len = p->matches[(size_t)numPairs - 2]; + if (len != p->numFastBytes) + return len; { - const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - SizeT distance = p->matches[numPairs - 1] + 1; - SizeT numAvail = p->numAvail; + UInt32 numAvail = p->numAvail; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; { - const Byte *pby2 = pby - distance; - for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++); + const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + const Byte *p2 = p1 + len; + const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; + const Byte *lim = p1 + numAvail; + for (; p2 != lim && *p2 == p2[dif]; p2++) + {} + return (unsigned)(p2 - p1); } } } - p->additionalOffset++; - *numDistancePairsRes = numPairs; - return lenRes; } +#define MARK_LIT ((UInt32)(Int32)-1) -#define MakeAsChar(p) (p)->backPrev = (SizeT)(-1); (p)->prev1IsChar = False; -#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False; -#define IsShortRep(p) ((p)->backPrev == 0) +#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } +#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } +#define IsShortRep(p) ((p)->dist == 0) -static SizeT GetRepLen1Price(const CLzmaEnc *p, SizeT state, SizeT posState) -{ - return - GET_PRICE_0(p->isRepG0[state]) + - GET_PRICE_0(p->isRep0Long[state][posState]); -} -static SizeT GetPureRepPrice(const CLzmaEnc *p, SizeT repIndex, SizeT state, SizeT posState) +#define GetPrice_ShortRep(p, state, posState) \ + ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) + +#define GetPrice_Rep_0(p, state, posState) ( \ + GET_PRICE_1(p->isMatch[state][posState]) \ + + GET_PRICE_1(p->isRep0Long[state][posState])) \ + + GET_PRICE_1(p->isRep[state]) \ + + GET_PRICE_0(p->isRepG0[state]) + +MY_FORCE_INLINE +static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) { - SizeT price; + UInt32 price; + UInt32 prob = p->isRepG0[state]; if (repIndex == 0) { - price = GET_PRICE_0(p->isRepG0[state]); + price = GET_PRICE_0(prob); price += GET_PRICE_1(p->isRep0Long[state][posState]); } else { - price = GET_PRICE_1(p->isRepG0[state]); + price = GET_PRICE_1(prob); + prob = p->isRepG1[state]; if (repIndex == 1) - price += GET_PRICE_0(p->isRepG1[state]); + price += GET_PRICE_0(prob); else { - price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE_1(prob); price += GET_PRICE(p->isRepG2[state], repIndex - 2); } } return price; } -static SizeT GetRepPrice(const CLzmaEnc *p, SizeT repIndex, SizeT len, SizeT state, SizeT posState) -{ - return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + - GetPureRepPrice(p, repIndex, state, posState); -} -static SizeT Backward(CLzmaEnc *p, SizeT *backRes, SizeT cur) +static unsigned Backward(CLzmaEnc *p, unsigned cur) { - SizeT posMem = p->opt[cur].posPrev; - SizeT backMem = p->opt[cur].backPrev; - p->optimumEndIndex = cur; - do + unsigned wr = cur + 1; + p->optEnd = wr; + + for (;;) { - if (p->opt[cur].prev1IsChar) + UInt32 dist = p->opt[cur].dist; + unsigned len = (unsigned)p->opt[cur].len; + unsigned extra = (unsigned)p->opt[cur].extra; + cur -= len; + + if (extra) { - MakeAsChar(&p->opt[posMem]) - p->opt[posMem].posPrev = posMem - 1; - if (p->opt[cur].prev2) + wr--; + p->opt[wr].len = (UInt32)len; + cur -= extra; + len = extra; + if (extra == 1) { - p->opt[posMem - 1].prev1IsChar = False; - p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; - p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; + p->opt[wr].dist = dist; + dist = MARK_LIT; + } + else + { + p->opt[wr].dist = 0; + len--; + wr--; + p->opt[wr].dist = MARK_LIT; + p->opt[wr].len = 1; } } - { - SizeT posPrev = posMem; - SizeT backCur = backMem; - - backMem = p->opt[posPrev].backPrev; - posMem = p->opt[posPrev].posPrev; - - p->opt[posPrev].backPrev = backCur; - p->opt[posPrev].posPrev = cur; - cur = posPrev; - } - } while (cur != 0); - *backRes = p->opt[0].backPrev; - p->optimumCurrentIndex = p->opt[0].posPrev; - return p->optimumCurrentIndex; -} - -#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * (SizeT)0x300) - -static SizeT GetOptimum(CLzmaEnc *p, SizeT position, SizeT *backRes) //-V2008 -{ - SizeT matchPrice, repMatchPrice; - SizeT posState, mainLen, numPairs, lenEnd, cur, len, numAvail; - SizeT repMaxIndex, i; - SizeT normalMatchPrice; - SizeT reps[LZMA_NUM_REPS]; - SizeT repLens[LZMA_NUM_REPS]; - SizeT *matches; - const Byte *data; - Byte curByte, matchByte; - if (p->optimumEndIndex != p->optimumCurrentIndex) - { - const COptimal *opt = &p->opt[p->optimumCurrentIndex]; - SizeT lenRes = opt->posPrev - p->optimumCurrentIndex; - *backRes = opt->backPrev; - p->optimumCurrentIndex = opt->posPrev; - return lenRes; - } - p->optimumCurrentIndex = p->optimumEndIndex = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) - { - *backRes = (SizeT)(-1); - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - { - SizeT lenTest; - const Byte *data2; - reps[i] = p->reps[i]; - data2 = data - (reps[i] + (SizeT)1); - if (data[0] != data2[0] || data[1] != data2[1]) + if (cur == 0) { - repLens[i] = 0; - continue; + p->backRes = dist; + p->optCur = wr; + return len; } - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); - repLens[i] = lenTest; - if (lenTest > repLens[repMaxIndex]) - repMaxIndex = i; + + wr--; + p->opt[wr].dist = dist; + p->opt[wr].len = (UInt32)len; } - if (repLens[repMaxIndex] >= p->numFastBytes) - { - SizeT lenRes; - *backRes = repMaxIndex; - lenRes = repLens[repMaxIndex]; - MovePos(p, lenRes - 1); - return lenRes; - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) - { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - curByte = *data; - matchByte = *(data - (reps[0] + (SizeT)1)); +} - if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) - { - *backRes = (SizeT)-1; - return 1; - } - p->opt[0].state = (CState)p->state; - posState = (position & p->pbMask); +#define LIT_PROBS(pos, prevByte) \ + (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsCharState(p->state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAsChar(&p->opt[1]); - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); +static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) +{ + unsigned last, cur; + UInt32 reps[LZMA_NUM_REPS]; + unsigned repLens[LZMA_NUM_REPS]; + UInt32 *matches; - if (matchByte == curByte) { - SizeT shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) + UInt32 numAvail; + unsigned numPairs, mainLen, repMaxIndex, i, posState; + UInt32 matchPrice, repMatchPrice; + const Byte *data; + Byte curByte, matchByte; + + p->optCur = p->optEnd = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else { - p->opt[1].price = shortRepPrice; - MakeAsShortRep(&p->opt[1]); + mainLen = p->longestMatchLen; + numPairs = p->numPairs; } - } - lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); - - if (lenEnd < 2) - { - *backRes = p->opt[1].backPrev; - return 1; - } - - p->opt[1].posPrev = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->opt[0].backs[i] = reps[i]; - - len = lenEnd; - do - p->opt[len--].price = kInfinityPrice; - while (len >= 2); - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - SizeT repLen = repLens[i]; - SizeT price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); - do + + numAvail = p->numAvail; + if (numAvail < 2) + { + p->backRes = MARK_LIT; + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) { - SizeT curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; - COptimal *opt = &p->opt[repLen]; - if (curAndLenPrice < opt->price) + unsigned len; + const Byte *data2; + reps[i] = p->reps[i]; + data2 = data - reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = i; - opt->prev1IsChar = False; + repLens[i] = 0; + continue; } - } while (--repLen >= 2); - } - - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + repLens[i] = len; + if (len > repLens[repMaxIndex]) + repMaxIndex = i; + if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization + break; + } + + if (repLens[repMaxIndex] >= p->numFastBytes) + { + unsigned len; + p->backRes = (UInt32)repMaxIndex; + len = repLens[repMaxIndex]; + MOVE_POS(p, len - 1) + return len; + } + + matches = p->matches; + #define MATCHES matches + // #define MATCHES p->matches + + if (mainLen >= p->numFastBytes) + { + p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + curByte = *data; + matchByte = *(data - reps[0]); - len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); - if (len <= mainLen) - { - SizeT offs = 0; - while (len > matches[offs]) - offs += 2; - for (; ; len++) + last = repLens[repMaxIndex]; + if (last <= mainLen) + last = mainLen; + + if (last < 2 && curByte != matchByte) + { + p->backRes = MARK_LIT; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + { - COptimal *opt; - SizeT distance = matches[offs + 1]; + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsLitState(p->state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } - SizeT curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; - SizeT lenToPosState = GetLenToPosState(len); - if (distance < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][distance]; - else + MakeAs_Lit(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + // 18.06 + if (matchByte == curByte && repLens[0] == 0) + { + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) { - SizeT slot; - GetPosSlot2(distance, slot); - curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; + p->opt[1].price = shortRepPrice; + MakeAs_ShortRep(&p->opt[1]); } - opt = &p->opt[len]; - if (curAndLenPrice < opt->price) + if (last < 2) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = distance + LZMA_NUM_REPS; - opt->prev1IsChar = False; + p->backRes = p->opt[1].dist; + return 1; } - if (len == matches[offs]) + } + + p->opt[1].len = 1; + + p->opt[0].reps[0] = reps[0]; + p->opt[0].reps[1] = reps[1]; + p->opt[0].reps[2] = reps[2]; + p->opt[0].reps[3] = reps[3]; + + // ---------- REP ---------- + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); + do { - offs += 2; - if (offs == numPairs) - break; + UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen); + COptimal *opt = &p->opt[repLen]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)repLen; + opt->dist = (UInt32)i; + opt->extra = 0; + } } + while (--repLen >= 2); } - } + + + // ---------- MATCH ---------- + { + unsigned len = repLens[0] + 1; + if (len <= mainLen) + { + unsigned offs = 0; + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - cur = 0; + if (len < 2) + len = 2; + else + while (len > MATCHES[offs]) + offs += 2; + + for (; ; len++) + { + COptimal *opt; + UInt32 dist = MATCHES[(size_t)offs + 1]; + UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); + unsigned lenToPosState = GetLenToPosState(len); + + if (dist < kNumFullDistances) + price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + { + unsigned slot; + GetPosSlot2(dist, slot); + price += p->alignPrices[dist & kAlignMask]; + price += p->posSlotPrices[lenToPosState][slot]; + } + + opt = &p->opt[len]; + + if (price < opt->price) + { + opt->price = price; + opt->len = (UInt32)len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + + if (len == MATCHES[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } + } + } + -#ifdef SHOW_STAT2 - if (position >= 0) - { - unsigned i; - printf("\n pos = %4X", position); - for (i = cur; i <= lenEnd; i++) - printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); + cur = 0; + + #ifdef SHOW_STAT2 + /* if (position >= 0) */ + { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= last; i++) + printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); + } + #endif } -#endif + + + + // ---------- Optimal Parsing ---------- for (;;) { - SizeT newLen, posPrev, numAvailFull, startLen; - SizeT state; - SizeT curPrice; - SizeT curAnd1Price; - Bool nextIsChar; - COptimal *curOpt; - COptimal *nextOpt; - SizeT offs, curBack, posSlot; - SizeT lenTest; + unsigned numAvail; + UInt32 numAvailFull; + unsigned newLen, numPairs, prev, state, posState, startLen; + UInt32 litPrice, matchPrice, repMatchPrice; + BoolInt nextIsLit; + Byte curByte, matchByte; + const Byte *data; + COptimal *curOpt, *nextOpt; - cur++; - if (cur == lenEnd) - return Backward(p, backRes, cur); + if (++cur == last) + break; + + // 18.06 + if (cur >= kNumOpts - 64) + { + unsigned j, best; + UInt32 price = p->opt[cur].price; + best = cur; + for (j = cur + 1; j <= last; j++) + { + UInt32 price2 = p->opt[j].price; + if (price >= price2) + { + price = price2; + best = j; + } + } + { + unsigned delta = best - cur; + if (delta != 0) + { + MOVE_POS(p, delta); + } + } + cur = best; + break; + } newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) { p->numPairs = numPairs; - p->longestMatchLength = newLen; - return Backward(p, backRes, cur); + p->longestMatchLen = newLen; + break; } - position++; + curOpt = &p->opt[cur]; - posPrev = curOpt->posPrev; - if (curOpt->prev1IsChar) - { - posPrev--; - if (curOpt->prev2) - { - state = p->opt[curOpt->posPrev2].state; - if (curOpt->backPrev2 < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - else - state = p->opt[posPrev].state; - state = kLiteralNextStates[state]; - } - else - state = p->opt[posPrev].state; - if (posPrev == cur - 1) + + position++; + + // we need that check here, if skip_items in p->opt are possible + /* + if (curOpt->price >= kInfinityPrice) + continue; + */ + + prev = cur - curOpt->len; + + if (curOpt->len == 1) { + state = (unsigned)p->opt[prev].state; if (IsShortRep(curOpt)) state = kShortRepNextStates[state]; else @@ -1175,348 +1524,499 @@ static SizeT GetOptimum(CLzmaEnc *p, SizeT position, SizeT *backRes) //-V2008 } else { - SizeT pos; const COptimal *prevOpt; - if (curOpt->prev1IsChar && curOpt->prev2) + UInt32 b0; + UInt32 dist = curOpt->dist; + + if (curOpt->extra) { - posPrev = curOpt->posPrev2; - pos = curOpt->backPrev2; - state = kRepNextStates[state]; + prev -= (unsigned)curOpt->extra; + state = kState_RepAfterLit; + if (curOpt->extra == 1) + state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit); } else { - pos = curOpt->backPrev; - if (pos < LZMA_NUM_REPS) + state = (unsigned)p->opt[prev].state; + if (dist < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } - prevOpt = &p->opt[posPrev]; - if (pos < LZMA_NUM_REPS) + + prevOpt = &p->opt[prev]; + b0 = prevOpt->reps[0]; + + if (dist < LZMA_NUM_REPS) { - reps[0] = prevOpt->backs[pos]; - for (i = 1; i <= pos; i++) - reps[i] = prevOpt->backs[i - 1]; - for (; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i]; + if (dist == 0) + { + reps[0] = b0; + reps[1] = prevOpt->reps[1]; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[1] = b0; + b0 = prevOpt->reps[1]; + if (dist == 1) + { + reps[0] = b0; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[2] = b0; + reps[0] = prevOpt->reps[dist]; + reps[3] = prevOpt->reps[dist ^ 1]; + } + } } else { - reps[0] = (pos - LZMA_NUM_REPS); - for (i = 1; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i - 1]; + reps[0] = (dist - LZMA_NUM_REPS + 1); + reps[1] = b0; + reps[2] = prevOpt->reps[1]; + reps[3] = prevOpt->reps[2]; } } + curOpt->state = (CState)state; + curOpt->reps[0] = reps[0]; + curOpt->reps[1] = reps[1]; + curOpt->reps[2] = reps[2]; + curOpt->reps[3] = reps[3]; - curOpt->backs[0] = reps[0]; - curOpt->backs[1] = reps[1]; - curOpt->backs[2] = reps[2]; - curOpt->backs[3] = reps[3]; - - curPrice = curOpt->price; - nextIsChar = False; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; curByte = *data; - matchByte = *(data - (reps[0] + (SizeT)1)); + matchByte = *(data - reps[0]); posState = (position & p->pbMask); - curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); + /* + The order of Price checks: + < LIT + <= SHORT_REP + < LIT : REP_0 + < REP [ : LIT : REP_0 ] + < MATCH [ : LIT : REP_0 ] + */ + { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - curAnd1Price += - (!IsCharState(state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + UInt32 curPrice = curOpt->price; + unsigned prob = p->isMatch[state][posState]; + matchPrice = curPrice + GET_PRICE_1(prob); + litPrice = curPrice + GET_PRICE_0(prob); } - nextOpt = &p->opt[cur + 1]; - - if (curAnd1Price < nextOpt->price) + nextOpt = &p->opt[(size_t)cur + 1]; + nextIsLit = False; + + // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice) + // 18.new.06 + if ((nextOpt->price < kInfinityPrice + // && !IsLitState(state) + && matchByte == curByte) + || litPrice > nextOpt->price + ) + litPrice = 0; + else { - nextOpt->price = curAnd1Price; - nextOpt->posPrev = cur; - MakeAsChar(nextOpt); - nextIsChar = True; + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + litPrice += (!IsLitState(state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + + if (litPrice < nextOpt->price) + { + nextOpt->price = litPrice; + nextOpt->len = 1; + MakeAs_Lit(nextOpt); + nextIsLit = True; + } } - matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + numAvailFull = p->numAvail; + { + unsigned temp = kNumOpts - 1 - cur; + if (numAvailFull > temp) + numAvailFull = (UInt32)temp; + } - if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) + // 18.06 + // ---------- SHORT_REP ---------- + if (IsLitState(state)) // 18.new + if (matchByte == curByte) + if (repMatchPrice < nextOpt->price) // 18.new + // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1)) + if ( + // nextOpt->price >= kInfinityPrice || + nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt + || (nextOpt->dist != 0 + // && nextOpt->extra <= 1 // 17.old + ) + ) { - SizeT shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); - if (shortRepPrice <= nextOpt->price) + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); + // if (shortRepPrice <= nextOpt->price) // 17.old + if (shortRepPrice < nextOpt->price) // 18.new { nextOpt->price = shortRepPrice; - nextOpt->posPrev = cur; - MakeAsShortRep(nextOpt); - nextIsChar = True; + nextOpt->len = 1; + MakeAs_ShortRep(nextOpt); + nextIsLit = False; } } - numAvailFull = p->numAvail; - { - SizeT temp = kNumOpts - 1 - cur; - if (temp < numAvailFull) - numAvailFull = temp; - } - + if (numAvailFull < 2) continue; numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - if (!nextIsChar && matchByte != curByte) /* speed optimization */ + // numAvail <= p->numFastBytes + + // ---------- LIT : REP_0 ---------- + + if (!nextIsLit + && litPrice != 0 // 18.new + && matchByte != curByte + && numAvailFull > 2) { - /* try Literal + rep0 */ - SizeT temp; - SizeT lenTest2; - const Byte *data2 = data - (reps[0] + (SizeT)1); - SizeT limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; - - for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++); - lenTest2 = temp - 1; - if (lenTest2 >= 2) + const Byte *data2 = data - reps[0]; + if (data[1] == data2[1] && data[2] == data2[2]) { - SizeT state2 = kLiteralNextStates[state]; - SizeT posStateNext = (position + 1) & p->pbMask; - SizeT nextRepMatchPrice = curAnd1Price + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - /* for (; lenTest2 >= 2; lenTest2--) */ + unsigned len; + unsigned limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + for (len = 3; len < limit && data[len] == data2[len]; len++) + {} + { - SizeT curAndLenPrice; - COptimal *opt; - SizeT offset = cur + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + unsigned state2 = kLiteralNextStates[state]; + unsigned posState2 = (position + 1) & p->pbMask; + UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); { - opt->price = curAndLenPrice; - opt->posPrev = cur + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = False; + unsigned offset = cur + len; + + if (last < offset) + last = offset; + + // do + { + UInt32 price2; + COptimal *opt; + len--; + // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len); + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len; + opt->dist = 0; + opt->extra = 1; + } + } + // while (len >= 3); } } } } - + startLen = 2; /* speed optimization */ + { - SizeT repIndex; - for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) + // ---------- REP ---------- + unsigned repIndex = 0; // 17.old + // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused + for (; repIndex < LZMA_NUM_REPS; repIndex++) { - SizeT lenTestTemp; - SizeT price; - const Byte *data2 = data - (reps[repIndex] + (SizeT)1); + unsigned len; + UInt32 price; + const Byte *data2 = data - reps[repIndex]; if (data[0] != data2[0] || data[1] != data2[1]) continue; - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); - while (lenEnd < cur + lenTest) - p->opt[++lenEnd].price = kInfinityPrice; - lenTestTemp = lenTest; - price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); - do + + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + + // if (len < startLen) continue; // 18.new: speed optimization + + { + unsigned offset = cur + len; + if (last < offset) + last = offset; + } { - SizeT curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; - COptimal *opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) + unsigned len2 = len; + price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); + do { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = repIndex; - opt->prev1IsChar = False; + UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2); + COptimal *opt = &p->opt[cur + len2]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->dist = (UInt32)repIndex; + opt->extra = 0; + } } - } while (--lenTest >= 2); - lenTest = lenTestTemp; - - if (repIndex == 0) - startLen = lenTest + 1; + while (--len2 >= 2); + } + + if (repIndex == 0) startLen = len + 1; // 17.old + // startLen = len + 1; // 18.new /* if (_maxMode) */ { - SizeT lenTest2 = lenTest + 1; - SizeT limit = lenTest2 + p->numFastBytes; - SizeT nextRepMatchPrice; + // ---------- REP : LIT : REP_0 ---------- + // numFastBytes + 1 + numFastBytes + + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; if (limit > numAvailFull) limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) + + len2 += 2; + if (len2 <= limit) + if (data[len2 - 2] == data2[len2 - 2]) + if (data[len2 - 1] == data2[len2 - 1]) + { + unsigned state2 = kRepNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + price += GET_PRICE_LEN(&p->repLenEnc, posState, len) + + GET_PRICE_0(p->isMatch[state2][posState2]) + + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterRep; + posState2 = (posState2 + 1) & p->pbMask; + + + price += GetPrice_Rep_0(p, state2, posState2); + + for (; len2 < limit && data[len2] == data2[len2]; len2++) + {} + + len2 -= len; + // if (len2 >= 3) { - SizeT state2 = kRepNextStates[state]; - SizeT posStateNext = (position + lenTest) & p->pbMask; - SizeT curAndLenCharPrice = - price + p->repLenEnc.prices[posState][lenTest - 2] + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (position + lenTest + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ { - SizeT curAndLenPrice; - COptimal *opt; - SizeT offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + unsigned offset = cur + len + len2; + + if (last < offset) + last = offset; + // do { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = repIndex; + UInt32 price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->extra = (CExtra)(len + 1); + opt->dist = (UInt32)repIndex; + } } + // while (len2 >= 3); } } + } } } } - /* for (SizeT lenTest = 2; lenTest <= newLen; lenTest++) */ + + + // ---------- MATCH ---------- + /* for (unsigned len = 2; len <= newLen; len++) */ if (newLen > numAvail) { newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); - matches[numPairs] = newLen; + for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2); + MATCHES[numPairs] = (UInt32)newLen; numPairs += 2; } + + // startLen = 2; /* speed optimization */ + if (newLen >= startLen) { - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - while (lenEnd < cur + newLen) - p->opt[++lenEnd].price = kInfinityPrice; + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + UInt32 dist; + unsigned offs, posSlot, len; + + { + unsigned offset = cur + newLen; + if (last < offset) + last = offset; + } offs = 0; - while (startLen > matches[offs]) + while (startLen > MATCHES[offs]) offs += 2; - curBack = matches[offs + 1]; - GetPosSlot2(curBack, posSlot); - for (lenTest = /*2*/ startLen; ; lenTest++) + dist = MATCHES[(size_t)offs + 1]; + + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); + + for (len = /*2*/ startLen; ; len++) { - SizeT curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; - SizeT lenToPosState = GetLenToPosState(lenTest); - COptimal *opt; - if (curBack < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; - else - curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; - - opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) + UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = curBack + LZMA_NUM_REPS; - opt->prev1IsChar = False; + COptimal *opt; + unsigned lenNorm = len - 2; + lenNorm = GetLenToPosState2(lenNorm); + if (dist < kNumFullDistances) + price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)]; + else + price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask]; + + opt = &p->opt[cur + len]; + if (price < opt->price) + { + opt->price = price; + opt->len = (UInt32)len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } } - if (/*_maxMode && */lenTest == matches[offs]) + if (len == MATCHES[offs]) { - /* Try Match + Literal + Rep0 */ - const Byte *data2 = data - (curBack + (SizeT)1); - SizeT lenTest2 = lenTest + 1; - SizeT limit = lenTest2 + p->numFastBytes; - SizeT nextRepMatchPrice; + // if (p->_maxMode) { + // MATCH : LIT : REP_0 + + const Byte *data2 = data - dist - 1; + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; if (limit > numAvailFull) limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) + + len2 += 2; + if (len2 <= limit) + if (data[len2 - 2] == data2[len2 - 2]) + if (data[len2 - 1] == data2[len2 - 1]) { - SizeT state2 = kMatchNextStates[state]; - SizeT posStateNext = (position + lenTest) & p->pbMask; - SizeT curAndLenCharPrice = curAndLenPrice + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (posStateNext + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ + for (; len2 < limit && data[len2] == data2[len2]; len2++) + {} + + len2 -= len; + + // if (len2 >= 3) + { + unsigned state2 = kMatchNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + unsigned offset; + price += GET_PRICE_0(p->isMatch[state2][posState2]); + price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterMatch; + + posState2 = (posState2 + 1) & p->pbMask; + price += GetPrice_Rep_0(p, state2, posState2); + + offset = cur + len + len2; + + if (last < offset) + last = offset; + // do { - SizeT offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + UInt32 price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + // offset--; + if (price2 < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = curBack + LZMA_NUM_REPS; + opt->price = price2; + opt->len = (UInt32)len2; + opt->extra = (CExtra)(len + 1); + opt->dist = dist + LZMA_NUM_REPS; } } + // while (len2 >= 3); + } + } + offs += 2; if (offs == numPairs) break; - curBack = matches[offs + 1]; - if (curBack >= kNumFullDistances) - GetPosSlot2(curBack, posSlot); + dist = MATCHES[(size_t)offs + 1]; + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); } } } } + + do + p->opt[last].price = kInfinityPrice; + while (--last); + + return Backward(p, cur); } + + #define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) -static SizeT GetOptimumFast(CLzmaEnc *p, SizeT *backRes) + + +static unsigned GetOptimumFast(CLzmaEnc *p) { - SizeT mainDist, mainLen, repLen, numPairs; - SizeT numAvail, repIndex, i; + UInt32 numAvail, mainDist; + unsigned mainLen, numPairs, repIndex, repLen, i; const Byte *data; - const SizeT *matches; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { - mainLen = p->longestMatchLength; + mainLen = p->longestMatchLen; numPairs = p->numPairs; } numAvail = p->numAvail; - *backRes = (SizeT)-1; + p->backRes = MARK_LIT; if (numAvail < 2) return 1; + // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) { - SizeT len; - const Byte *data2 = data - (p->reps[i] + (SizeT)1); + unsigned len; + const Byte *data2 = data - p->reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) continue; - for (len = 2; len < numAvail && data[len] == data2[len]; len++); + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} if (len >= p->numFastBytes) { - *backRes = i; - MovePos(p, len - 1); + p->backRes = (UInt32)i; + MOVE_POS(p, len - 1) return len; } if (len > repLen) @@ -1526,98 +2026,182 @@ static SizeT GetOptimumFast(CLzmaEnc *p, SizeT *backRes) } } - matches = p->matches; if (mainLen >= p->numFastBytes) { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); + p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) return mainLen; } mainDist = 0; /* for GCC */ + if (mainLen >= 2) { - mainDist = matches[numPairs - 1]; - while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) + mainDist = p->matches[(size_t)numPairs - 1]; + while (numPairs > 2) { - if (!ChangePair(matches[numPairs - 3], mainDist)) + UInt32 dist2; + if (mainLen != p->matches[(size_t)numPairs - 4] + 1) + break; + dist2 = p->matches[(size_t)numPairs - 3]; + if (!ChangePair(dist2, mainDist)) break; numPairs -= 2; - mainLen = matches[numPairs - 2]; - mainDist = matches[numPairs - 1]; + mainLen--; + mainDist = dist2; } if (mainLen == 2 && mainDist >= 0x80) mainLen = 1; } - if (repLen >= 2 && ( - (repLen + 1 >= mainLen) || - (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || - (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) + if (repLen >= 2) + if ( repLen + 1 >= mainLen + || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) { - *backRes = repIndex; - MovePos(p, repLen - 1); + p->backRes = (UInt32)repIndex; + MOVE_POS(p, repLen - 1) return repLen; } - + if (mainLen < 2 || numAvail <= 2) return 1; - p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); - if (p->longestMatchLength >= 2) { - SizeT newDistance = matches[p->numPairs - 1]; - if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || - (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || - (p->longestMatchLength > mainLen + 1) || - (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) - return 1; + unsigned len1 = ReadMatchDistances(p, &p->numPairs); + p->longestMatchLen = len1; + + if (len1 >= 2) + { + UInt32 newDist = p->matches[(size_t)p->numPairs - 1]; + if ( (len1 >= mainLen && newDist < mainDist) + || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist)) + || (len1 > mainLen + 1) + || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) + return 1; + } } - + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) { - SizeT len, limit; - const Byte *data2 = data - (p->reps[i] + (SizeT)1); + unsigned len, limit; + const Byte *data2 = data - p->reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) continue; limit = mainLen - 1; - for (len = 2; len < limit && data[len] == data2[len]; len++); - if (len >= limit) - return 1; + for (len = 2;; len++) + { + if (len >= limit) + return 1; + if (data[len] != data2[len]) + break; + } + } + + p->backRes = mainDist + LZMA_NUM_REPS; + if (mainLen != 2) + { + MOVE_POS(p, mainLen - 2) } - *backRes = mainDist + LZMA_NUM_REPS; - MovePos(p, mainLen - 2); return mainLen; } -static void WriteEndMarker(CLzmaEnc *p, SizeT posState) + + + +static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { - SizeT len; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + UInt32 range; + range = p->rc.range; + { + UInt32 ttt, newBound; + CLzmaProb *prob = &p->isMatch[p->state][posState]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_1(&p->rc, prob) + prob = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_0(&p->rc, prob) + } p->state = kMatchNextStates[p->state]; - len = LZMA_MATCH_LEN_MIN; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - RcTree_Encode(&p->rc, p->posSlotEncoder[len - LZMA_MATCH_LEN_MIN], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); - RangeEnc_EncodeDirectBits(&p->rc, (((SizeT)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + + p->rc.range = range; + LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); + range = p->rc.range; + + { + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1); + CLzmaProb *probs = p->posSlotEncoder[0]; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < (1 << kNumPosSlotBits)); + } + { + // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range; + unsigned numBits = 30 - kNumAlignBits; + do + { + range >>= 1; + p->rc.low += range; + RC_NORM(&p->rc) + } + while (--numBits); + } + + { + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + CLzmaProb *probs = p->posAlignEncoder; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < kAlignTableSize); + } + p->rc.range = range; } + static SRes CheckErrors(CLzmaEnc *p) { if (p->result != SZ_OK) return p->result; if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) + + #ifndef _7ZIP_ST + if ( + // p->mf_Failure || + (p->mtMode && + ( // p->matchFinderMt.failure_LZ_LZ || + p->matchFinderMt.failure_LZ_BT)) + ) + { + p->result = MY_HRES_ERROR__INTERNAL_ERROR; + // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); + } + #endif + + if (MFB.result != SZ_OK) p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) p->finished = True; return p->result; } -static SRes Flush(CLzmaEnc *p, SizeT nowPos) + +MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; @@ -1628,55 +2212,135 @@ static SRes Flush(CLzmaEnc *p, SizeT nowPos) return CheckErrors(p); } -static void FillAlignPrices(CLzmaEnc *p) + +MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) { - SizeT i; - for (i = 0; i < kAlignTableSize; i++) - p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); - p->alignPriceCount = 0; + unsigned i; + const CProbPrice *ProbPrices = p->ProbPrices; + const CLzmaProb *probs = p->posAlignEncoder; + // p->alignPriceCount = 0; + for (i = 0; i < kAlignTableSize / 2; i++) + { + UInt32 price = 0; + unsigned sym = i; + unsigned m = 1; + unsigned bit; + UInt32 prob; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + prob = probs[m]; + p->alignPrices[i ] = price + GET_PRICEa_0(prob); + p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); + // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + } } -static void FillDistancesPrices(CLzmaEnc *p) + +MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) { - SizeT tempPrices[kNumFullDistances]; - SizeT i, lenToPosState; - for (i = kStartPosModelIndex; i < kNumFullDistances; i++) - { - SizeT posSlot = GetPosSlot1(i); - SizeT footerBits = ((posSlot >> 1) - 1); - SizeT base = ((2 | (posSlot & 1)) << footerBits); - tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); + // int y; for (y = 0; y < 100; y++) { + + UInt32 tempPrices[kNumFullDistances]; + unsigned i, lps; + + const CProbPrice *ProbPrices = p->ProbPrices; + p->matchPriceCount = 0; + + for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) + { + unsigned posSlot = GetPosSlot1(i); + unsigned footerBits = (posSlot >> 1) - 1; + unsigned base = ((2 | (posSlot & 1)) << footerBits); + const CLzmaProb *probs = p->posEncoders + (size_t)base * 2; + // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); + UInt32 price = 0; + unsigned m = 1; + unsigned sym = i; + unsigned offset = (unsigned)1 << footerBits; + base += i; + + if (footerBits) + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) + bit; + } + while (--footerBits); + + { + unsigned prob = probs[m]; + tempPrices[base ] = price + GET_PRICEa_0(prob); + tempPrices[base + offset] = price + GET_PRICEa_1(prob); + } } - for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) + for (lps = 0; lps < kNumLenToPosStates; lps++) { - SizeT posSlot; - const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; - SizeT *posSlotPrices = p->posSlotPrices[lenToPosState]; - for (posSlot = 0; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); - for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + unsigned slot; + unsigned distTableSize2 = (p->distTableSize + 1) >> 1; + UInt32 *posSlotPrices = p->posSlotPrices[lps]; + const CLzmaProb *probs = p->posSlotEncoder[lps]; + + for (slot = 0; slot < distTableSize2; slot++) + { + // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices); + UInt32 price; + unsigned bit; + unsigned sym = slot + (1 << (kNumPosSlotBits - 1)); + unsigned prob; + bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))]; + posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob); + posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob); + } + + { + UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++) + { + posSlotPrices[(size_t)slot * 2 ] += delta; + posSlotPrices[(size_t)slot * 2 + 1] += delta; + delta += ((UInt32)1 << kNumBitPriceShiftBits); + } + } { - SizeT *distancesPrices = p->distancesPrices[lenToPosState]; - for (i = 0; i < kStartPosModelIndex; i++) - distancesPrices[i] = posSlotPrices[i]; - for (; i < kNumFullDistances; i++) - distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + UInt32 *dp = p->distancesPrices[lps]; + + dp[0] = posSlotPrices[0]; + dp[1] = posSlotPrices[1]; + dp[2] = posSlotPrices[2]; + dp[3] = posSlotPrices[3]; + + for (i = 4; i < kNumFullDistances; i += 2) + { + UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; + dp[i ] = slotPrice + tempPrices[i]; + dp[i + 1] = slotPrice + tempPrices[i + 1]; + } } } - p->matchPriceCount = 0; + // } } -void LzmaEnc_Construct(CLzmaEnc *p) + + +static void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); -#ifndef _7ZIP_ST + MatchFinder_Construct(&MFB); + + #ifndef _7ZIP_ST + p->matchFinderMt.MatchFinder = &MFB; MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; -#endif + #endif { CLzmaEncProps props; @@ -1684,53 +2348,62 @@ void LzmaEnc_Construct(CLzmaEnc *p) LzmaEnc_SetProps(p, &props); } -#ifndef LZMA_LOG_BSR + #ifndef LZMA_LOG_BSR LzmaEnc_FastPosInit(p->g_FastPos); -#endif + #endif LzmaEnc_InitPriceTables(p->ProbPrices); - p->litProbs = 0; - p->saveState.litProbs = 0; + p->litProbs = NULL; + p->saveState.litProbs = NULL; } -CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc) +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) { void *p; - p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); - if (p != 0) + p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); + if (p) LzmaEnc_Construct((CLzmaEnc *)p); return p; } -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) +static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) { - alloc->Free(alloc, p->litProbs); - alloc->Free(alloc, p->saveState.litProbs); - p->litProbs = 0; - p->saveState.litProbs = 0; + ISzAlloc_Free(alloc, p->litProbs); + ISzAlloc_Free(alloc, p->saveState.litProbs); + p->litProbs = NULL; + p->saveState.litProbs = NULL; } -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) +static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { -#ifndef _7ZIP_ST + #ifndef _7ZIP_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); -#endif - MatchFinder_Free(&p->matchFinderBase, allocBig); + #endif + + MatchFinder_Free(&MFB, allocBig); LzmaEnc_FreeLits(p, alloc); RangeEnc_Free(&p->rc, alloc); } -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig) +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); - alloc->Free(alloc, p); + ISzAlloc_Free(alloc, p); } -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, SizeT maxPackSize, SizeT maxUnpackSize) + +MY_NO_INLINE +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { - SizeT nowPos32, startPos32; + UInt32 nowPos32, startPos32; if (p->needInit) { + #ifndef _7ZIP_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + } + #endif p->matchFinder.Init(p->matchFinderObj); p->needInit = 0; } @@ -1739,178 +2412,310 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, SizeT maxPackSize, return p->result; RINOK(CheckErrors(p)); - nowPos32 = (SizeT)p->nowPos64; + nowPos32 = (UInt32)p->nowPos64; startPos32 = nowPos32; if (p->nowPos64 == 0) { - SizeT numPairs; + unsigned numPairs; Byte curByte; if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) return Flush(p, nowPos32); ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); - p->state = kLiteralNextStates[p->state]; - curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); + RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); + // p->state = kLiteralNextStates[p->state]; + curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); LitEnc_Encode(&p->rc, p->litProbs, curByte); p->additionalOffset--; nowPos32++; } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) - for (;;) + + for (;;) + { + UInt32 dist; + unsigned len, posState; + UInt32 range, ttt, newBound; + CLzmaProb *probs; + + if (p->fastMode) + len = GetOptimumFast(p); + else { - SizeT pos, len; - SizeT posState; - - if (p->fastMode) - len = GetOptimumFast(p, &pos); + unsigned oci = p->optCur; + if (p->optEnd == oci) + len = GetOptimum(p, nowPos32); else - len = GetOptimum(p, nowPos32, &pos); - -#ifdef SHOW_STAT2 - printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); -#endif - - posState = nowPos32 & p->pbMask; - if (len == 1 && pos == (SizeT)-1) { - Byte curByte; - CLzmaProb *probs; - const Byte *data; - - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - curByte = *data; - probs = LIT_PROBS(nowPos32, *(data - 1)); - if (IsCharState(p->state)) - LitEnc_Encode(&p->rc, probs, curByte); - else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); - p->state = kLiteralNextStates[p->state]; + const COptimal *opt = &p->opt[oci]; + len = opt->len; + p->backRes = opt->dist; + p->optCur = oci + 1; } + } + + posState = (unsigned)nowPos32 & p->pbMask; + range = p->rc.range; + probs = &p->isMatch[p->state][posState]; + + RC_BIT_PRE(&p->rc, probs) + + dist = p->backRes; + + #ifdef SHOW_STAT2 + printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); + #endif + + if (dist == MARK_LIT) + { + Byte curByte; + const Byte *data; + unsigned state; + + RC_BIT_0(&p->rc, probs); + p->rc.range = range; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; + probs = LIT_PROBS(nowPos32, *(data - 1)); + curByte = *data; + state = p->state; + p->state = kLiteralNextStates[state]; + if (IsLitState(state)) + LitEnc_Encode(&p->rc, probs, curByte); else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); + } + else + { + RC_BIT_1(&p->rc, probs); + probs = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, probs) + + if (dist < LZMA_NUM_REPS) { - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - if (pos < LZMA_NUM_REPS) + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG0[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 0) { - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); - if (pos == 0) + RC_BIT_0(&p->rc, probs); + probs = &p->isRep0Long[p->state][posState]; + RC_BIT_PRE(&p->rc, probs) + if (len != 1) { - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); - RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); + RC_BIT_1_BASE(&p->rc, probs); } else { - SizeT distance = p->reps[pos]; - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); - if (pos == 1) - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); - else - { - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); - if (pos == 3) - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - } - p->reps[1] = p->reps[0]; - p->reps[0] = distance; - } - if (len == 1) + RC_BIT_0_BASE(&p->rc, probs); p->state = kShortRepNextStates[p->state]; - else - { - LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - p->state = kRepNextStates[p->state]; } } else { - SizeT i; - SizeT posSlot; - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - pos -= LZMA_NUM_REPS; - GetPosSlot(pos, posSlot); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); - - if (posSlot >= kStartPosModelIndex) + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG1[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 1) { - SizeT footerBits = ((posSlot >> 1) - 1); - SizeT base = ((2 | (posSlot & 1)) << footerBits); - SizeT posReduced = pos - base; - - if (posSlot < kEndPosModelIndex) - RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[1]; + } + else + { + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG2[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 2) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[2]; + } else { - RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - p->alignPriceCount++; + RC_BIT_1_BASE(&p->rc, probs); + dist = p->reps[3]; + p->reps[3] = p->reps[2]; } + p->reps[2] = p->reps[1]; } - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; p->reps[1] = p->reps[0]; - p->reps[0] = pos; - p->matchPriceCount++; + p->reps[0] = dist; + } + + RC_NORM(&p->rc) + + p->rc.range = range; + + if (len != 1) + { + LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + --p->repLenEncCounter; + p->state = kRepNextStates[p->state]; } } - p->additionalOffset -= len; - nowPos32 += len; - if (p->additionalOffset == 0) + else { - SizeT processed; - if (!p->fastMode) + unsigned posSlot; + RC_BIT_0(&p->rc, probs); + p->rc.range = range; + p->state = kMatchNextStates[p->state]; + + LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + // --p->lenEnc.counter; + + dist -= LZMA_NUM_REPS; + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = dist + 1; + + p->matchPriceCount++; + GetPosSlot(dist, posSlot); + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); { - if (p->matchPriceCount >= (1 << 7)) - FillDistancesPrices(p); - if (p->alignPriceCount >= kAlignTableSize) - FillAlignPrices(p); + UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); + range = p->rc.range; + probs = p->posSlotEncoder[GetLenToPosState(len)]; + do + { + CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); + UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; + sym <<= 1; + RC_BIT(&p->rc, prob, bit); + } + while (sym < (1 << kNumPosSlotBits * 2)); + p->rc.range = range; } - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - break; - processed = nowPos32 - startPos32; - if (useLimits) + + if (dist >= kStartPosModelIndex) { - if (processed + kNumOpts + 300 >= maxUnpackSize || - RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) - break; + unsigned footerBits = ((posSlot >> 1) - 1); + + if (dist < kNumFullDistances) + { + unsigned base = ((2 | (posSlot & 1)) << footerBits); + RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */)); + } + else + { + UInt32 pos2 = (dist | 0xF) << (32 - footerBits); + range = p->rc.range; + // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + /* + do + { + range >>= 1; + p->rc.low += range & (0 - ((dist >> --footerBits) & 1)); + RC_NORM(&p->rc) + } + while (footerBits > kNumAlignBits); + */ + do + { + range >>= 1; + p->rc.low += range & (0 - (pos2 >> 31)); + pos2 += pos2; + RC_NORM(&p->rc) + } + while (pos2 != 0xF0000000); + + + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + + { + unsigned m = 1; + unsigned bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + p->rc.range = range; + // p->alignPriceCount++; + } + } + } + } + } + + nowPos32 += (UInt32)len; + p->additionalOffset -= len; + + if (p->additionalOffset == 0) + { + UInt32 processed; + + if (!p->fastMode) + { + /* + if (p->alignPriceCount >= 16) // kAlignTableSize + FillAlignPrices(p); + if (p->matchPriceCount >= 128) + FillDistancesPrices(p); + if (p->lenEnc.counter <= 0) + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + */ + if (p->matchPriceCount >= 64) + { + FillAlignPrices(p); + // { int y; for (y = 0; y < 100; y++) { + FillDistancesPrices(p); + // }} + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); } - else if (processed >= (1 << 15)) + if (p->repLenEncCounter <= 0) { - p->nowPos64 += nowPos32 - startPos32; - return CheckErrors(p); + p->repLenEncCounter = REP_LEN_COUNT; + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; + + if (maxPackSize) + { + if (processed + kNumOpts + 300 >= maxUnpackSize + || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) + break; + } + else if (processed >= (1 << 17)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } } + } + p->nowPos64 += nowPos32 - startPos32; return Flush(p, nowPos32); } -#define kBigHashDicLimit ((SizeT)1 << 24) -static SRes LzmaEnc_Alloc(CLzmaEnc *p, SizeT keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - SizeT beforeSize = kNumOpts; - Bool btMode; + UInt32 beforeSize = kNumOpts; + UInt32 dictSize; + if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; - btMode = (p->matchFinderBase.btMode != 0); -#ifndef _7ZIP_ST - p->mtMode = (p->multiThread && !p->fastMode && btMode); -#endif + + #ifndef _7ZIP_ST + p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); + #endif { - ptrdiff_t lclp = p->lc + p->lp; - if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) + unsigned lclp = p->lc + p->lp; + if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, ((SizeT)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, ((SizeT)0x300 << lclp) * sizeof(CLzmaProb)); - if (p->litProbs == 0 || p->saveState.litProbs == 0) + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + if (!p->litProbs || !p->saveState.litProbs) { LzmaEnc_FreeLits(p, alloc); return SZ_ERROR_MEM; @@ -1919,42 +2724,72 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, SizeT keepWindowSize, ISzAlloc *alloc, IS } } - p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); + MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; -#ifndef _7ZIP_ST + dictSize = p->dictSize; + if (dictSize == ((UInt32)2 << 30) || + dictSize == ((UInt32)3 << 30)) + { + /* 21.03 : here we reduce the dictionary for 2 reasons: + 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary. + 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases, + where data size is aligned for 1 GB: 5/6/8 GB. + That reducing must be >= 1 for such corner cases. */ + dictSize -= 1; + } + + if (beforeSize + dictSize < keepWindowSize) + beforeSize = keepWindowSize - dictSize; + + /* in worst case we can look ahead for + max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes. + we send larger value for (keepAfter) to MantchFinder_Create(): + (numFastBytes + LZMA_MATCH_LEN_MAX + 1) + */ + + #ifndef _7ZIP_ST if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); + RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ + , allocBig)); p->matchFinderObj = &p->matchFinderMt; + MFB.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else -#endif + #endif { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + if (!MatchFinder_Create(&MFB, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */ + , allocBig)) return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + p->matchFinderObj = &MFB; + MatchFinder_CreateVTable(&MFB, &p->matchFinder); } + return SZ_OK; } -void LzmaEnc_Init(CLzmaEnc *p) +static void LzmaEnc_Init(CLzmaEnc *p) { - SizeT i; + unsigned i; p->state = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->reps[i] = 0; + p->reps[0] = + p->reps[1] = + p->reps[2] = + p->reps[3] = 1; RangeEnc_Init(&p->rc); + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; for (i = 0; i < kNumStates; i++) { - SizeT j; + unsigned j; for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { p->isMatch[i][j] = kProbInitValue; @@ -1966,41 +2801,50 @@ void LzmaEnc_Init(CLzmaEnc *p) p->isRepG2[i] = kProbInitValue; } - { - SizeT num = 0x300LL << (p->lp + p->lc); - for (i = 0; i < num; i++) - p->litProbs[i] = kProbInitValue; - } - { for (i = 0; i < kNumLenToPosStates; i++) { CLzmaProb *probs = p->posSlotEncoder[i]; - SizeT j; + unsigned j; for (j = 0; j < (1 << kNumPosSlotBits); j++) probs[j] = kProbInitValue; } } { - for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + for (i = 0; i < kNumFullDistances; i++) p->posEncoders[i] = kProbInitValue; } - LenEnc_Init(&p->lenEnc.p); - LenEnc_Init(&p->repLenEnc.p); + { + UInt32 num = (UInt32)0x300 << (p->lp + p->lc); + UInt32 k; + CLzmaProb *probs = p->litProbs; + for (k = 0; k < num; k++) + probs[k] = kProbInitValue; + } - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - p->optimumEndIndex = 0; - p->optimumCurrentIndex = 0; + LenEnc_Init(&p->lenProbs); + LenEnc_Init(&p->repLenProbs); + + p->optEnd = 0; + p->optCur = 0; + + { + for (i = 0; i < kNumOpts; i++) + p->opt[i].price = kInfinityPrice; + } + p->additionalOffset = 0; - p->pbMask = (1LL << p->pb) - 1LL; - p->lpMask = (1LL << p->lp) - 1LL; + p->pbMask = ((unsigned)1 << p->pb) - 1; + p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); + + // p->mf_Failure = False; } -void LzmaEnc_InitPrices(CLzmaEnc *p) + +static void LzmaEnc_InitPrices(CLzmaEnc *p) { if (!p->fastMode) { @@ -2009,17 +2853,20 @@ void LzmaEnc_InitPrices(CLzmaEnc *p) } p->lenEnc.tableSize = - p->repLenEnc.tableSize = - p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - LenPriceEnc_UpdateTables(&p->lenEnc, 1LL << p->pb, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1LL << p->pb, p->ProbPrices); + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + + p->repLenEncCounter = REP_LEN_COUNT; + + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } -static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, SizeT keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - SizeT i; - for (i = 0; i < (SizeT)kDicLogSizeMaxCompress; i++) - if (p->dictSize <= ((SizeT)1 << i)) + unsigned i; + for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) + if (p->dictSize <= ((UInt32)1 << i)) break; p->distTableSize = i * 2; @@ -2033,81 +2880,88 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, SizeT keepWindowSize, ISzAlloc *al } static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, - ISzAlloc *alloc, ISzAlloc *allocBig) + ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; + MFB.stream = inStream; p->needInit = 1; p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); } SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream *inStream, SizeT keepWindowSize, - ISzAlloc *alloc, ISzAlloc *allocBig) + ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; + MFB.stream = inStream; p->needInit = 1; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) { - p->matchFinderBase.directInput = 1; - p->matchFinderBase.bufferBase = (Byte *)src; - p->matchFinderBase.directInputRem = srcLen; + MFB.directInput = 1; + MFB.bufferBase = (Byte *)src; + MFB.directInputRem = srcLen; } SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - SizeT keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; LzmaEnc_SetInputBuf(p, src, srcLen); p->needInit = 1; + LzmaEnc_SetDataSize(pp, srcLen); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } void LzmaEnc_Finish(CLzmaEncHandle pp) { -#ifndef _7ZIP_ST + #ifndef _7ZIP_ST CLzmaEnc *p = (CLzmaEnc *)pp; if (p->mtMode) MatchFinderMt_ReleaseStream(&p->matchFinderMt); -#else - //pp = pp; -#endif + #else + UNUSED_VAR(pp); + #endif } + typedef struct { - ISeqOutStream funcTable; + ISeqOutStream vt; Byte *data; SizeT rem; - Bool overflow; -} CSeqOutStreamBuf; + BoolInt overflow; +} CLzmaEnc_SeqOutStreamBuf; -static size_t MyWrite(void *pp, const void *data, size_t size) +static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) { - CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp; + CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); if (p->rem < size) { size = p->rem; p->overflow = True; } - memcpy(p->data, data, size); - p->rem -= size; - p->data += size; + if (size != 0) + { + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + } return size; } -SizeT LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +/* +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { const CLzmaEnc *p = (CLzmaEnc *)pp; return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } +*/ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { @@ -2115,15 +2969,17 @@ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; } -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, - Byte *dest, size_t *destLen, SizeT desiredPackSize, SizeT *unpackSize) + +// (desiredPackSize == 0) is not allowed +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) { CLzmaEnc *p = (CLzmaEnc *)pp; UInt64 nowPos64; SRes res; - CSeqOutStreamBuf outStream; + CLzmaEnc_SeqOutStreamBuf outStream; - outStream.funcTable.Write = MyWrite; + outStream.vt.Write = SeqOutStreamBuf_Write; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; @@ -2135,13 +2991,13 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, if (reInit) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); - nowPos64 = p->nowPos64; RangeEnc_Init(&p->rc); - p->rc.outStream = &outStream.funcTable; //-V506 - - res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); - - *unpackSize = (SizeT)(p->nowPos64 - nowPos64); + p->rc.outStream = &outStream.vt; + nowPos64 = p->nowPos64; + + res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); + + *unpackSize = (UInt32)(p->nowPos64 - nowPos64); *destLen -= outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; @@ -2149,25 +3005,26 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, return res; } + +MY_NO_INLINE static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) { SRes res = SZ_OK; -#ifndef _7ZIP_ST + #ifndef _7ZIP_ST Byte allocaDummy[0x300]; - int i = 0; - for (i = 0; i < 16; i++) - allocaDummy[i] = (Byte)i; -#endif + allocaDummy[0] = 0; + allocaDummy[1] = allocaDummy[0]; + #endif for (;;) { - res = LzmaEnc_CodeOneBlock(p, False, 0, 0); - if (res != SZ_OK || p->finished != 0) + res = LzmaEnc_CodeOneBlock(p, 0, 0); + if (res != SZ_OK || p->finished) break; - if (progress != 0) + if (progress) { - res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); if (res != SZ_OK) { res = SZ_ERROR_PROGRESS; @@ -2175,67 +3032,93 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } } } + LzmaEnc_Finish(p); + + /* + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) + res = SZ_ERROR_FAIL; + } + */ + return res; } + SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, - ISzAlloc *alloc, ISzAlloc *allocBig) + ISzAllocPtr alloc, ISzAllocPtr allocBig) { RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); } + SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) { - CLzmaEnc *p = (CLzmaEnc *)pp; - SizeT i; - SizeT dictSize = p->dictSize; if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - for (i = 11; i <= 30; i++) { - if (dictSize <= (SizeT)(2LL << i)) + const CLzmaEnc *p = (const CLzmaEnc *)pp; + const UInt32 dictSize = p->dictSize; + UInt32 v; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + // we write aligned dictionary value to properties for lzma decoder + if (dictSize >= ((UInt32)1 << 21)) { - dictSize = (2LL << i); - break; + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; + v = (dictSize + kDictMask) & ~kDictMask; + if (v < dictSize) + v = dictSize; } - if (dictSize <= (SizeT)(3LL << i)) + else { - dictSize = (3LL << i); - break; + unsigned i = 11 * 2; + do + { + v = (UInt32)(2 + (i & 1)) << (i >> 1); + i++; + } + while (v < dictSize); } + + SetUi32(props + 1, v); + return SZ_OK; } +} - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; + +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +{ + return (unsigned)((CLzmaEnc *)pp)->writeEndMark; } + SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - ptrdiff_t writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { SRes res; CLzmaEnc *p = (CLzmaEnc *)pp; - CSeqOutStreamBuf outStream; - - LzmaEnc_SetInputBuf(p, src, srcLen); + CLzmaEnc_SeqOutStreamBuf outStream; - outStream.funcTable.Write = MyWrite; + outStream.vt.Write = SeqOutStreamBuf_Write; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; - p->writeEndMark = writeEndMark & 0x1; + p->writeEndMark = writeEndMark; + p->rc.outStream = &outStream.vt; - p->rc.outStream = &outStream.funcTable; //-V506 res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); + if (res == SZ_OK) + { res = LzmaEnc_Encode2(p, progress); + if (res == SZ_OK && p->nowPos64 != srcLen) + res = SZ_ERROR_FAIL; + } *destLen -= outStream.rem; if (outStream.overflow) @@ -2243,13 +3126,14 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte return res; } + SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, ptrdiff_t writeEndMark, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); SRes res; - if (p == 0) + if (!p) return SZ_ERROR_MEM; res = LzmaEnc_SetProps(p, props); @@ -2258,9 +3142,21 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); if (res == SZ_OK) res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, - writeEndMark, progress, alloc, allocBig); + writeEndMark, progress, alloc, allocBig); } LzmaEnc_Destroy(p, alloc, allocBig); return res; } + + +/* +#ifndef _7ZIP_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + lz_threads[0] = p->matchFinderMt.hashSync.thread; + lz_threads[1] = p->matchFinderMt.btSync.thread; +} +#endif +*/ diff --git a/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.h b/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.h index 3caa840..bc2ed50 100644 --- a/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.h +++ b/uefi_firmware/compression/LZMA/SDK/C/LzmaEnc.h @@ -1,80 +1,78 @@ /* LzmaEnc.h -- LZMA Encoder -2009-02-07 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZMA_ENC_H #define __LZMA_ENC_H -#include "Types.h" +#include "7zTypes.h" -#ifdef __cplusplus -extern "C" { -#endif +EXTERN_C_BEGIN #define LZMA_PROPS_SIZE 5 typedef struct _CLzmaEncProps { - int level; /* 0 <= level <= 9 */ - SizeT dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (1 << 30) for 64-bit version - default = (1 << 24) */ - ptrdiff_t lc; /* 0 <= lc <= 8, default = 3 */ - ptrdiff_t lp; /* 0 <= lp <= 4, default = 0 */ - ptrdiff_t pb; /* 0 <= pb <= 4, default = 2 */ - ptrdiff_t algo; /* 0 - fast, 1 - normal, default = 1 */ - ptrdiff_t fb; /* 5 <= fb <= 273, default = 32 */ - ptrdiff_t btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ - ptrdiff_t numHashBytes; /* 2, 3 or 4, default = 4 */ - SizeT mc; /* 1 <= mc <= (1 << 30), default = 32 */ - SizeT writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ - ptrdiff_t numThreads; /* 1 or 2, default = 2 */ + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (3 << 29) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ + + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. + Encoder uses this value to reduce dictionary size */ + + UInt64 affinity; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); void LzmaEncProps_Normalize(CLzmaEncProps *p); -SizeT LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); /* ---------- CLzmaEncHandle Interface ---------- */ -/* LzmaEnc_* functions can return the following exit codes: -Returns: +/* LzmaEnc* functions can return the following exit codes: +SRes: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - Write callback error. + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ typedef void * CLzmaEncHandle; -CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig); +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); + SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); + SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - ptrdiff_t writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); -/* ---------- One Call Interface ---------- */ -/* LzmaEncode -Return code: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater - SZ_ERROR_OUTPUT_EOF - output buffer overflow - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ +/* ---------- One Call Interface ---------- */ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, ptrdiff_t writeEndMark, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); -#ifdef __cplusplus -} -#endif +EXTERN_C_END #endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/Precomp.h b/uefi_firmware/compression/LZMA/SDK/C/Precomp.h new file mode 100644 index 0000000..fc663b6 --- /dev/null +++ b/uefi_firmware/compression/LZMA/SDK/C/Precomp.h @@ -0,0 +1,12 @@ +/* Precomp.h -- StdAfx +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_PRECOMP_H +#define __7Z_PRECOMP_H + +#include "Compiler.h" +/* #include "7zTypes.h" */ + +#define _7ZIP_ST + +#endif diff --git a/uefi_firmware/compression/LZMA/SDK/C/Types.h b/uefi_firmware/compression/LZMA/SDK/C/Types.h deleted file mode 100644 index 72497ec..0000000 --- a/uefi_firmware/compression/LZMA/SDK/C/Types.h +++ /dev/null @@ -1,268 +0,0 @@ -/* Types.h -- Basic types -2010-10-09 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -//#include "../../UefiLzma.h" -#include "BaseTypes.h" - -#ifdef _WIN32 -#undef _WIN32 -#endif - -#ifdef _WIN64 -#undef _WIN64 -#endif - -#define _LZMA_SIZE_OPT -#define _7ZIP_ST - -#include - -#ifdef _WIN32 -#include -#endif - -#ifndef EXTERN_C_BEGIN -#ifdef __cplusplus -#define EXTERN_C_BEGIN extern "C" { -#define EXTERN_C_END } -#else -#define EXTERN_C_BEGIN -#define EXTERN_C_END -#endif -#endif - -EXTERN_C_BEGIN - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - -#ifdef _WIN32 -typedef DWORD WRes; -#else -typedef int WRes; -#endif - -#ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n ## ULL -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - -typedef int Bool; -#define True 1 -#define False 0 - - -#ifdef _WIN32 -#define MY_STD_CALL __stdcall -#else -#define MY_STD_CALL -#endif - -#ifdef _MSC_VER - -#if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall - -#else - -#define MY_CDECL -#define MY_FAST_CALL - -#endif - - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct -{ - Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ -} IByteIn; - -typedef struct -{ - void (*Write)(void *p, Byte b); -} IByteOut; - -typedef struct -{ - SRes (*Read)(void *p, void *buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -} ISeqInStream; - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf); - -typedef struct -{ - size_t (*Write)(void *p, const void *buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -} ISeqOutStream; - -typedef enum -{ - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - -typedef struct -{ - SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); -} ISeekInStream; - -typedef struct -{ - SRes (*Look)(void *p, const void **buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(void *p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(void *p, void *buf, size_t *size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); -} ILookInStream; - -SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); - -#define LookToRead_BUF_SIZE (1 << 14) - -typedef struct -{ - ILookInStream s; - ISeekInStream *realStream; - size_t pos; - size_t size; - Byte buf[LookToRead_BUF_SIZE]; -} CLookToRead; - -void LookToRead_CreateVTable(CLookToRead *p, int lookahead); -void LookToRead_Init(CLookToRead *p); - -typedef struct -{ - ISeqInStream s; - ILookInStream *realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook *p); - -typedef struct -{ - ISeqInStream s; - ILookInStream *realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead *p); - -typedef struct -{ - SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -} ICompressProgress; - -typedef struct -{ - void *(*Alloc)(void *p, size_t size); - void (*Free)(void *p, void *address); /* address can be 0 */ -} ISzAlloc; - -#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) -#define IAlloc_Free(p, a) (p)->Free((p), a) - -#ifdef _WIN32 - -#define CHAR_PATH_SEPARATOR '\\' -#define WCHAR_PATH_SEPARATOR L'\\' -#define STRING_PATH_SEPARATOR "\\" -#define WSTRING_PATH_SEPARATOR L"\\" - -#else - -#define CHAR_PATH_SEPARATOR '/' -#define WCHAR_PATH_SEPARATOR L'/' -#define STRING_PATH_SEPARATOR "/" -#define WSTRING_PATH_SEPARATOR L"/" - -#endif - -EXTERN_C_END - -#endif diff --git a/uefi_firmware/compression/LZMA/UefiLzma.h b/uefi_firmware/compression/LZMA/UefiLzma.h new file mode 100644 index 0000000..fdd3834 --- /dev/null +++ b/uefi_firmware/compression/LZMA/UefiLzma.h @@ -0,0 +1,31 @@ +/* LZMA UEFI header file + + Copyright (c) 2009, Intel Corporation. All rights reserved. + This program and the accompanying materials + are licensed and made available under the terms and conditions of the BSD License + which accompanies this distribution. The full text of the license may be found at + http://opensource.org/licenses/bsd-license.php + + THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, + WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. + +*/ + +#ifndef __UEFILZMA_H__ +#define __UEFILZMA_H__ + +#include "BaseTypes.h" + +#ifdef _WIN32 +#undef _WIN32 +#endif + +#ifdef _WIN64 +#undef _WIN64 +#endif + +#define _LZMA_SIZE_OPT +#define _7ZIP_ST + +#endif // __UEFILZMA_H__ + From 01ef63fdc3ce57b6cfbff5cf283d8ad14b458586 Mon Sep 17 00:00:00 2001 From: yeggor Date: Thu, 1 Feb 2024 19:43:30 +0000 Subject: [PATCH 2/2] minor fix --- uefi_firmware/uefi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/uefi_firmware/uefi.py b/uefi_firmware/uefi.py index 48e1518..2605cd0 100644 --- a/uefi_firmware/uefi.py +++ b/uefi_firmware/uefi.py @@ -850,9 +850,13 @@ def process(self): if raw_object: self.parsed_object = RawObject(self.data) status = True + + # If we failed to unpack/parse one of the GuidDefinedSections + # we can skip it and continue parsing the rest of the objects elif isinstance(self.parsed_object, GuidDefinedSection): self.parsed_object = None return True + return status def build(self, generate_checksum=False, debug=False): @@ -895,9 +899,9 @@ def showinfo(self, ts='', index=-1): guid_name = get_guid_name(guid) offset = offset + 16 if guid_name is not None: - print ("%s PUSH %s (%s)" % (ts, guid_name, sguid(guid))) + print ("%s PUSH %s (%s)" % (ts, guid_name, sguid(guid))) else: - print ("%s PUSH %s" % (ts, sguid(guid))) + print ("%s PUSH %s" % (ts, sguid(guid))) elif opcode == 0x03: print ("%s AND" % (ts)) elif opcode == 0x04: