1
0
mirror of https://github.com/veracrypt/VeraCrypt.git synced 2025-11-11 11:08:02 -06:00

Dll125 lzma (#1120)

* Update LZMA to latest

* Add missing file
This commit is contained in:
DLL125
2023-06-23 21:19:50 +02:00
committed by GitHub
parent 9e1c0e5b22
commit 097cfa947e
24 changed files with 2155 additions and 1180 deletions

View File

@@ -1,8 +1,8 @@
/* 7zTypes.h -- Basic types /* 7zTypes.h -- Basic types
2021-12-25 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H #ifndef ZIP7_7Z_TYPES_H
#define __7Z_TYPES_H #define ZIP7_7Z_TYPES_H
#ifdef _WIN32 #ifdef _WIN32
/* #include <windows.h> */ /* #include <windows.h> */
@@ -52,6 +52,11 @@ typedef int SRes;
#define MY_ALIGN(n) #define MY_ALIGN(n)
#endif #endif
#else #else
/*
// C11/C++11:
#include <stdalign.h>
#define MY_ALIGN(n) alignas(n)
*/
#define MY_ALIGN(n) __attribute__ ((aligned(n))) #define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif #endif
@@ -62,7 +67,7 @@ typedef int SRes;
typedef unsigned WRes; typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) // #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32 #else // _WIN32
@@ -70,13 +75,13 @@ typedef unsigned WRes;
typedef int WRes; typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800 #define MY_FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7 #define MY_FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_ERRNO #define MY_FACILITY_WRes MY_FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \ ( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \ | (MY_FACILITY_WRes << 16) \
| (HRESULT)0x80000000 )) | (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \ #define MY_SRes_HRESULT_FROM_WRes(x) \
@@ -120,23 +125,19 @@ typedef int WRes;
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) #define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) #define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32), // if (MY_FACILITY_WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors: // we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L) #define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) #define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/* /*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) #define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) #define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) #define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/ */
// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
#define TEXT(quote) quote #define TEXT(quote) quote
#define FILE_ATTRIBUTE_READONLY 0x0001 #define FILE_ATTRIBUTE_READONLY 0x0001
@@ -160,18 +161,18 @@ typedef unsigned long UINT_PTR;
#ifndef RINOK #ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
#endif #endif
#ifndef RINOK_WRes #ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } #define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
#endif #endif
typedef unsigned char Byte; typedef unsigned char Byte;
typedef short Int16; typedef short Int16;
typedef unsigned short UInt16; typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG #ifdef Z7_DECL_Int32_AS_long
typedef long Int32; typedef long Int32;
typedef unsigned long UInt32; typedef unsigned long UInt32;
#else #else
@@ -210,37 +211,51 @@ typedef size_t SIZE_T;
#endif // _WIN32 #endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) #define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64 #ifdef Z7_DECL_Int64_AS_long
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64; typedef long Int64;
typedef unsigned long UInt64; typedef unsigned long UInt64;
#else #else
#if defined(_MSC_VER) || defined(__BORLANDC__) #if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
typedef __int64 Int64; typedef __int64 Int64;
typedef unsigned __int64 UInt64; typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n #else
#if defined(__clang__) || defined(__GNUC__)
#include <stdint.h>
typedef int64_t Int64;
typedef uint64_t UInt64;
#else #else
typedef long long int Int64; typedef long long int Int64;
typedef unsigned long long int UInt64; typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL // #define UINT64_CONST(n) n ## ULL
#endif
#endif #endif
#endif #endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T #define UINT64_CONST(n) n
typedef UInt32 SizeT;
#ifdef Z7_DECL_SizeT_AS_unsigned_int
typedef unsigned int SizeT;
#else #else
typedef size_t SizeT; typedef size_t SizeT;
#endif #endif
/*
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
typedef size_t MY_uintptr_t;
#else
#include <stdint.h>
typedef uintptr_t MY_uintptr_t;
#endif
*/
typedef int BoolInt; typedef int BoolInt;
/* typedef BoolInt Bool; */ /* typedef BoolInt Bool; */
#define True 1 #define True 1
@@ -248,23 +263,23 @@ typedef int BoolInt;
#ifdef _WIN32 #ifdef _WIN32
#define MY_STD_CALL __stdcall #define Z7_STDCALL __stdcall
#else #else
#define MY_STD_CALL #define Z7_STDCALL
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
#if _MSC_VER >= 1300 #if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline) #define Z7_NO_INLINE __declspec(noinline)
#else #else
#define MY_NO_INLINE #define Z7_NO_INLINE
#endif #endif
#define MY_FORCE_INLINE __forceinline #define Z7_FORCE_INLINE __forceinline
#define MY_CDECL __cdecl #define Z7_CDECL __cdecl
#define MY_FAST_CALL __fastcall #define Z7_FASTCALL __fastcall
#else // _MSC_VER #else // _MSC_VER
@@ -272,27 +287,25 @@ typedef int BoolInt;
|| (defined(__clang__) && (__clang_major__ >= 4)) \ || (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \ || defined(__INTEL_COMPILER) \
|| defined(__xlC__) || defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline)) #define Z7_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline #define Z7_FORCE_INLINE __attribute__((always_inline)) inline
#else #else
#define MY_NO_INLINE #define Z7_NO_INLINE
#define Z7_FORCE_INLINE
#endif #endif
#define MY_FORCE_INLINE #define Z7_CDECL
#define MY_CDECL
#if defined(_M_IX86) \ #if defined(_M_IX86) \
|| defined(__i386__) || defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall)) // #define Z7_FASTCALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl)) // #define Z7_FASTCALL __attribute__((cdecl))
#define MY_FAST_CALL #define Z7_FASTCALL
#elif defined(MY_CPU_AMD64) #elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi)) // #define Z7_FASTCALL __attribute__((ms_abi))
#define MY_FAST_CALL #define Z7_FASTCALL
#else #else
#define MY_FAST_CALL #define Z7_FASTCALL
#endif #endif
#endif // _MSC_VER #endif // _MSC_VER
@@ -300,41 +313,49 @@ typedef int BoolInt;
/* The following interfaces use first parameter as pointer to structure */ /* The following interfaces use first parameter as pointer to structure */
typedef struct IByteIn IByteIn; // #define Z7_C_IFACE_CONST_QUAL
struct IByteIn #define Z7_C_IFACE_CONST_QUAL const
#define Z7_C_IFACE_DECL(a) \
struct a ## _; \
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
typedef struct a ## _ a; \
struct a ## _
Z7_C_IFACE_DECL (IByteIn)
{ {
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
}; };
#define IByteIn_Read(p) (p)->Read(p) #define IByteIn_Read(p) (p)->Read(p)
typedef struct IByteOut IByteOut; Z7_C_IFACE_DECL (IByteOut)
struct IByteOut
{ {
void (*Write)(const IByteOut *p, Byte b); void (*Write)(IByteOutPtr p, Byte b);
}; };
#define IByteOut_Write(p, b) (p)->Write(p, b) #define IByteOut_Write(p, b) (p)->Write(p, b)
typedef struct ISeqInStream ISeqInStream; Z7_C_IFACE_DECL (ISeqInStream)
struct ISeqInStream
{ {
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */ (output(*size) < input(*size)) is allowed */
}; };
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* try to read as much as avail in stream and limited by (*processedSize) */
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
/* it can return SZ_ERROR_INPUT_EOF */ /* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); // SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); // SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
typedef struct ISeqOutStream ISeqOutStream; Z7_C_IFACE_DECL (ISeqOutStream)
struct ISeqOutStream
{ {
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes. /* Returns: result - the number of actually written bytes.
(result < size) means error */ (result < size) means error */
}; };
@@ -348,29 +369,26 @@ typedef enum
} ESzSeek; } ESzSeek;
typedef struct ISeekInStream ISeekInStream; Z7_C_IFACE_DECL (ISeekInStream)
struct ISeekInStream
{ {
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
}; };
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
typedef struct ILookInStream ILookInStream; Z7_C_IFACE_DECL (ILookInStream)
struct ILookInStream
{ {
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed (output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */ (output(*size) < input(*size)) is allowed */
SRes (*Skip)(const ILookInStream *p, size_t offset); SRes (*Skip)(ILookInStreamPtr p, size_t offset);
/* offset must be <= output(*size) of Look */ /* offset must be <= output(*size) of Look */
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */ /* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
}; };
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
@@ -379,19 +397,18 @@ struct ILookInStream
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
/* reads via ILookInStream::Read */ /* reads via ILookInStream::Read */
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
typedef struct typedef struct
{ {
ILookInStream vt; ILookInStream vt;
const ISeekInStream *realStream; ISeekInStreamPtr realStream;
size_t pos; size_t pos;
size_t size; /* it's data size */ size_t size; /* it's data size */
@@ -403,13 +420,13 @@ typedef struct
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } #define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
const ILookInStream *realStream; ILookInStreamPtr realStream;
} CSecToLook; } CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p); void SecToLook_CreateVTable(CSecToLook *p);
@@ -419,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
const ILookInStream *realStream; ILookInStreamPtr realStream;
} CSecToRead; } CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p); void SecToRead_CreateVTable(CSecToRead *p);
typedef struct ICompressProgress ICompressProgress; Z7_C_IFACE_DECL (ICompressProgress)
struct ICompressProgress
{ {
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break. /* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */ Value (UInt64)(Int64)-1 for size means unknown value. */
}; };
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
@@ -470,13 +486,13 @@ struct ISzAlloc
#ifndef MY_container_of #ifndef Z7_container_of
/* /*
#define MY_container_of(ptr, type, m) container_of(ptr, type, m) #define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) #define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) #define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) #define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/ */
/* /*
@@ -485,24 +501,64 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members" GCC 4.8.1 : classes with non-public variable members"
*/ */
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) #define Z7_container_of(ptr, type, m) \
((type *)(void *)((char *)(void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
#define Z7_container_of_CONST(ptr, type, m) \
((const type *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
/*
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
((type *)(void *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
*/
#endif #endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) #define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/* /*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/ */
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) #if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#endif
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
/* Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
*/ Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) // #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifndef Z7_ARRAY_SIZE
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#endif
#ifdef _WIN32 #ifdef _WIN32
@@ -520,6 +576,22 @@ struct ISzAlloc
#endif #endif
#define k_PropVar_TimePrec_0 0
#define k_PropVar_TimePrec_Unix 1
#define k_PropVar_TimePrec_DOS 2
#define k_PropVar_TimePrec_HighPrec 3
#define k_PropVar_TimePrec_Base 16
#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
EXTERN_C_END EXTERN_C_END
#endif #endif
/*
#ifndef Z7_ST
#ifdef _7ZIP_ST
#define Z7_ST
#endif
#endif
*/

101
src/Common/lzma/7zWindows.h Normal file
View File

@@ -0,0 +1,101 @@
/* 7zWindows.h -- StdAfx
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_7Z_WINDOWS_H
#define ZIP7_INC_7Z_WINDOWS_H
#ifdef _WIN32
#if defined(__clang__)
# pragma clang diagnostic push
#endif
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#if _MSC_VER == 1900
// for old kit10 versions
// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
#endif
// win10 Windows Kit:
#endif // _MSC_VER
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
// for msvc6 without sdk2003
#define RPC_NO_WINDOWS_H
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
// #if defined(__GNUC__) && !defined(__clang__)
#include <windows.h>
#else
#include <Windows.h>
#endif
// #include <basetsd.h>
// #include <wtypes.h>
// but if precompiled with clang-cl then we need
// #include <windows.h>
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
#ifndef _W64
typedef long LONG_PTR, *PLONG_PTR;
typedef unsigned long ULONG_PTR, *PULONG_PTR;
typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
#define Z7_OLD_WIN_SDK
#endif // _W64
#endif // _MSC_VER == 1200
#ifdef Z7_OLD_WIN_SDK
#ifndef INVALID_FILE_ATTRIBUTES
#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
#endif
#ifndef INVALID_SET_FILE_POINTER
#define INVALID_SET_FILE_POINTER ((DWORD)-1)
#endif
#ifndef FILE_SPECIAL_ACCESS
#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
#endif
// ShlObj.h:
// #define BIF_NEWDIALOGSTYLE 0x0040
#pragma warning(disable : 4201)
// #pragma warning(disable : 4115)
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#endif // Z7_OLD_WIN_SDK
#ifdef UNDER_CE
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#if defined(_MSC_VER)
#if _MSC_VER >= 1400 && _MSC_VER <= 1600
// BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
// string.h
// #pragma warning(disable : 4514)
#endif
#endif
/* #include "7zTypes.h" */
#endif

View File

@@ -1,38 +1,54 @@
/* Alloc.c -- Memory allocation functions /* Alloc.c -- Memory allocation functions
2021-07-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
#endif #endif
#include <stdlib.h> #include <stdlib.h>
#include "Alloc.h" #include "Alloc.h"
/* #define _SZ_ALLOC_DEBUG */ #ifdef _WIN32
#ifdef Z7_LARGE_PAGES
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#endif
#endif // Z7_LARGE_PAGES
#endif // _WIN32
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ // #define SZ_ALLOC_DEBUG
#ifdef _SZ_ALLOC_DEBUG /* #define SZ_ALLOC_DEBUG */
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef SZ_ALLOC_DEBUG
#include <string.h>
#include <stdio.h> #include <stdio.h>
int g_allocCount = 0; static int g_allocCount = 0;
int g_allocCountMid = 0; #ifdef _WIN32
int g_allocCountBig = 0; static int g_allocCountMid = 0;
static int g_allocCountBig = 0;
#endif
#define CONVERT_INT_TO_STR(charType, tempSize) \ #define CONVERT_INT_TO_STR(charType, tempSize) \
unsigned char temp[tempSize]; unsigned i = 0; \ char temp[tempSize]; unsigned i = 0; \
while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
*s++ = (charType)('0' + (unsigned)val); \ *s++ = (charType)('0' + (unsigned)val); \
while (i != 0) { i--; *s++ = temp[i]; } \ while (i != 0) { i--; *s++ = temp[i]; } \
*s = 0; *s = 0;
static void ConvertUInt64ToString(UInt64 val, char *s) static void ConvertUInt64ToString(UInt64 val, char *s)
{ {
CONVERT_INT_TO_STR(char, 24); CONVERT_INT_TO_STR(char, 24)
} }
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
@@ -77,7 +93,7 @@ static void PrintAligned(const char *s, size_t align)
Print(s); Print(s);
} }
static void PrintLn() static void PrintLn(void)
{ {
Print("\n"); Print("\n");
} }
@@ -89,10 +105,10 @@ static void PrintHex(UInt64 v, size_t align)
PrintAligned(s, align); PrintAligned(s, align);
} }
static void PrintDec(UInt64 v, size_t align) static void PrintDec(int v, size_t align)
{ {
char s[32]; char s[32];
ConvertUInt64ToString(v, s); ConvertUInt64ToString((unsigned)v, s);
PrintAligned(s, align); PrintAligned(s, align);
} }
@@ -102,12 +118,19 @@ static void PrintAddr(void *p)
} }
#define PRINT_ALLOC(name, cnt, size, ptr) \ #define PRINT_REALLOC(name, cnt, size, ptr) { \
Print(name " "); \
if (!ptr) PrintDec(cnt++, 10); \
PrintHex(size, 10); \
PrintAddr(ptr); \
PrintLn(); }
#define PRINT_ALLOC(name, cnt, size, ptr) { \
Print(name " "); \ Print(name " "); \
PrintDec(cnt++, 10); \ PrintDec(cnt++, 10); \
PrintHex(size, 10); \ PrintHex(size, 10); \
PrintAddr(ptr); \ PrintAddr(ptr); \
PrintLn(); PrintLn(); }
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \ #define PRINT_FREE(name, cnt, ptr) if (ptr) { \
Print(name " "); \ Print(name " "); \
@@ -117,7 +140,9 @@ static void PrintAddr(void *p)
#else #else
#ifdef _WIN32
#define PRINT_ALLOC(name, cnt, size, ptr) #define PRINT_ALLOC(name, cnt, size, ptr)
#endif
#define PRINT_FREE(name, cnt, ptr) #define PRINT_FREE(name, cnt, ptr)
#define Print(s) #define Print(s)
#define PrintLn() #define PrintLn()
@@ -127,16 +152,31 @@ static void PrintAddr(void *p)
#endif #endif
/*
by specification:
malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
realloc(NULL, size) : the call is equivalent to malloc(size)
realloc(non_NULL, 0) : the call is equivalent to free(ptr)
in main compilers:
malloc(0) : returns non_NULL
realloc(NULL, 0) : returns non_NULL
realloc(non_NULL, 0) : returns NULL
*/
void *MyAlloc(size_t size) void *MyAlloc(size_t size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
{ {
void *p = malloc(size); void *p = malloc(size);
// PRINT_ALLOC("Alloc ", g_allocCount, size, p); if (p)
{
PRINT_ALLOC("Alloc ", g_allocCount, size, p)
}
return p; return p;
} }
#else #else
@@ -146,33 +186,64 @@ void *MyAlloc(size_t size)
void MyFree(void *address) void MyFree(void *address)
{ {
PRINT_FREE("Free ", g_allocCount, address); PRINT_FREE("Free ", g_allocCount, address)
free(address); free(address);
} }
void *MyRealloc(void *address, size_t size)
{
if (size == 0)
{
MyFree(address);
return NULL;
}
// PRINT_REALLOC("Realloc ", g_allocCount, size, address)
#ifdef SZ_ALLOC_DEBUG
{
void *p = realloc(address, size);
if (p)
{
PRINT_REALLOC("Realloc ", g_allocCount, size, address)
}
return p;
}
#else
return realloc(address, size);
#endif
}
#ifdef _WIN32 #ifdef _WIN32
void *MidAlloc(size_t size) void *MidAlloc(size_t size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
#ifdef SZ_ALLOC_DEBUG
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); {
void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
if (p)
{
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
}
return p;
}
#else
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
#endif
} }
void MidFree(void *address) void MidFree(void *address)
{ {
PRINT_FREE("Free-Mid", g_allocCountMid, address); PRINT_FREE("Free-Mid", g_allocCountMid, address)
if (!address) if (!address)
return; return;
VirtualFree(address, 0, MEM_RELEASE); VirtualFree(address, 0, MEM_RELEASE);
} }
#ifdef _7ZIP_LARGE_PAGES #ifdef Z7_LARGE_PAGES
#ifdef MEM_LARGE_PAGES #ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
@@ -183,34 +254,35 @@ void MidFree(void *address)
extern extern
SIZE_T g_LargePageSize; SIZE_T g_LargePageSize;
SIZE_T g_LargePageSize = 0; SIZE_T g_LargePageSize = 0;
typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
#endif // _7ZIP_LARGE_PAGES void SetLargePageSize(void)
void SetLargePageSize()
{ {
#ifdef _7ZIP_LARGE_PAGES #ifdef Z7_LARGE_PAGES
SIZE_T size; SIZE_T size;
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) const
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); Func_GetLargePageMinimum fn =
if (!largePageMinimum) (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetLargePageMinimum");
if (!fn)
return; return;
size = largePageMinimum(); size = fn();
if (size == 0 || (size & (size - 1)) != 0) if (size == 0 || (size & (size - 1)) != 0)
return; return;
g_LargePageSize = size; g_LargePageSize = size;
#endif #endif
} }
#endif // Z7_LARGE_PAGES
void *BigAlloc(size_t size) void *BigAlloc(size_t size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
#ifdef _7ZIP_LARGE_PAGES #ifdef Z7_LARGE_PAGES
{ {
SIZE_T ps = g_LargePageSize; SIZE_T ps = g_LargePageSize;
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
@@ -220,38 +292,38 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps; size2 = (size + ps) & ~ps;
if (size2 >= size) if (size2 >= size)
{ {
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (res) if (p)
return res; {
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
return p;
}
} }
} }
} }
#endif #endif
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); return MidAlloc(size);
} }
void BigFree(void *address) void BigFree(void *address)
{ {
PRINT_FREE("Free-Big", g_allocCountBig, address); PRINT_FREE("Free-Big", g_allocCountBig, address)
MidFree(address);
if (!address)
return;
VirtualFree(address, 0, MEM_RELEASE);
} }
#endif #endif // _WIN32
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree }; const ISzAlloc g_Alloc = { SzAlloc, SzFree };
#ifdef _WIN32 #ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif #endif
@@ -334,7 +406,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
void *p; void *p;
void *pAligned; void *pAligned;
size_t newSize; size_t newSize;
UNUSED_VAR(pp); UNUSED_VAR(pp)
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */ block to prevent cache line sharing with another allocated blocks */
@@ -362,7 +434,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
#else #else
void *p; void *p;
UNUSED_VAR(pp); UNUSED_VAR(pp)
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL; return NULL;
@@ -377,7 +449,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
static void SzAlignedFree(ISzAllocPtr pp, void *address) static void SzAlignedFree(ISzAllocPtr pp, void *address)
{ {
UNUSED_VAR(pp); UNUSED_VAR(pp)
#ifndef USE_posix_memalign #ifndef USE_posix_memalign
if (address) if (address)
MyFree(((void **)address)[-1]); MyFree(((void **)address)[-1]);
@@ -401,7 +473,7 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{ {
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
void *adr; void *adr;
void *pAligned; void *pAligned;
size_t newSize; size_t newSize;
@@ -447,7 +519,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{ {
if (address) if (address)
{ {
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
PrintLn(); PrintLn();
Print("- Aligned Free: "); Print("- Aligned Free: ");
PrintLn(); PrintLn();

View File

@@ -1,19 +1,32 @@
/* Alloc.h -- Memory allocation functions /* Alloc.h -- Memory allocation functions
2021-07-13 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H #ifndef ZIP7_INC_ALLOC_H
#define __COMMON_ALLOC_H #define ZIP7_INC_ALLOC_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/*
MyFree(NULL) : is allowed, as free(NULL)
MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
MyRealloc() is similar to realloc() for the following cases:
MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
*/
void *MyAlloc(size_t size); void *MyAlloc(size_t size);
void MyFree(void *address); void MyFree(void *address);
void *MyRealloc(void *address, size_t size);
#ifdef _WIN32 #ifdef _WIN32
#ifdef Z7_LARGE_PAGES
void SetLargePageSize(void); void SetLargePageSize(void);
#endif
void *MidAlloc(size_t size); void *MidAlloc(size_t size);
void MidFree(void *address); void MidFree(void *address);

View File

@@ -1,12 +1,37 @@
/* Compiler.h /* Compiler.h : Compiler specific defines and pragmas
2021-01-05 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H #ifndef ZIP7_INC_COMPILER_H
#define __7Z_COMPILER_H #define ZIP7_INC_COMPILER_H
#if defined(__clang__)
# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
#if defined(__clang__) && defined(__apple_build_version__)
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__clang__)
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__GNUC__)
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifdef _MSC_VER
#if !defined(__clang__) && !defined(__GNUC__)
#define Z7_MSC_VER_ORIGINAL _MSC_VER
#endif
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
#define Z7_MINGW
#endif
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded"
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-private-field"
#endif
#ifdef _MSC_VER #ifdef _MSC_VER
@@ -17,24 +42,115 @@
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif #endif
#if _MSC_VER >= 1300 #if defined(_MSC_VER) && _MSC_VER >= 1800
#pragma warning(disable : 4996) // This function or variable may be unsafe #pragma warning(disable : 4464) // relative include path contains '..'
#else #endif
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#ifdef __clang__ // == 1200 : -O1 : for __forceinline
#pragma clang diagnostic ignored "-Wdeprecated-declarations" // >= 1900 : -O1 : for printf
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" #pragma warning(disable : 4710) // function not inlined
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#if _MSC_VER < 1900
// winnt.h: 'Int64ShllMod32'
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#endif
#if _MSC_VER < 1300
// #pragma warning(disable : 4702) // unreachable code
// Bra.c : -O1:
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#endif
/*
#if _MSC_VER > 1400 && _MSC_VER <= 1900
// strcat: This function or variable may be unsafe
// sysinfoapi.h: kit10: GetVersion was declared deprecated
#pragma warning(disable : 4996)
#endif
*/
#if _MSC_VER > 1200
// -Wall warnings
#pragma warning(disable : 4711) // function selected for automatic inline expansion
#pragma warning(disable : 4820) // '2' bytes padding added after data member
#if _MSC_VER >= 1400 && _MSC_VER < 1920
// 1400: string.h: _DBG_MEMCPY_INLINE_
// 1600 - 191x : smmintrin.h __cplusplus'
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#pragma warning(disable : 4668)
// 1400 - 1600 : WinDef.h : 'FARPROC' :
// 1900 - 191x : immintrin.h: _readfsbase_u32
// no function prototype given : converting '()' to '(void)'
#pragma warning(disable : 4255)
#endif
#if _MSC_VER >= 1914
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
#pragma warning(disable : 5045)
#endif
#endif // _MSC_VER > 1200
#endif // _MSC_VER
#if defined(__clang__) && (__clang_major__ >= 4)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5)
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("loop( no_vector )")
#define Z7_ATTRIB_NO_VECTORIZE
#else
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#define Z7_ATTRIB_NO_VECTORIZE
#endif
#if defined(MY_CPU_X86_OR_AMD64) && ( \
defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5))
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
#else
#define Z7_ATTRIB_NO_SSE
#endif
#define Z7_ATTRIB_NO_VECTOR \
Z7_ATTRIB_NO_VECTORIZE \
Z7_ATTRIB_NO_SSE
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// GCC is not good for __builtin_expect()
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_unlikely [[unlikely]]
// #define Z7_likely [[likely]]
#else
#define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000))
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif #endif
#define UNUSED_VAR(x) (void)x; #define UNUSED_VAR(x) (void)x;

View File

@@ -1,187 +1,318 @@
/* CpuArch.c -- CPU specific code /* CpuArch.c -- CPU specific code
2021-07-13 : Igor Pavlov : Public domain */ 2023-05-18 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
// #include <stdio.h>
#include "CpuArch.h" #include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) #undef NEED_CHECK_FOR_CPUID
#define USE_ASM #if !defined(MY_CPU_AMD64)
#define NEED_CHECK_FOR_CPUID
#endif #endif
#if !defined(USE_ASM) && _MSC_VER >= 1500 /*
#include <intrin.h> cpuid instruction supports (subFunction) parameter in ECX,
that is used only with some specific (function) parameter values.
But we always use only (subFunction==0).
*/
/*
__cpuid(): MSVC and GCC/CLANG use same function/macro name
but parameters are different.
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
*/
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
|| defined(__clang__) /* && (__clang_major__ >= 10) */
/* there was some CLANG/GCC compilers that have issues with
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
The history of __cpuid() changes in CLANG/GCC:
GCC:
2007: it preserved ebx for (__PIC__ && __i386__)
2013: it preserved rbx and ebx for __PIC__
2014: it doesn't preserves rbx and ebx anymore
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
CLANG:
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
Do we need __PIC__ test for CLANG or we must care about rbx even if
__PIC__ is not defined?
*/
#define ASM_LN "\n"
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "mov %%rbx, %q1" \
ASM_LN "cpuid" \
ASM_LN "xchg %%rbx, %q1" \
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
/* "=&r" selects free register. It can select even rbx, if that register is free.
"=&D" for (RDI) also works, but the code can be larger with "=&D"
"2"(0) means (subFunction = 0),
2 is (zero-based) index in the output constraint list "=c" (ECX). */
#elif defined(MY_CPU_X86) && defined(__PIC__) \
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "mov %%ebx, %k1" \
ASM_LN "cpuid" \
ASM_LN "xchg %%ebx, %k1" \
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
#else
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "cpuid" \
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
#endif #endif
#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
static UInt32 CheckFlag(UInt32 flag) void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{ {
#ifdef _MSC_VER x86_cpuid_MACRO(p, func)
__asm pushfd;
__asm pop EAX;
__asm mov EDX, EAX;
__asm xor EAX, flag;
__asm push EAX;
__asm popfd;
__asm pushfd;
__asm pop EAX;
__asm xor EAX, EDX;
__asm push EDX;
__asm popfd;
__asm and flag, EAX;
#else
__asm__ __volatile__ (
"pushf\n\t"
"pop %%EAX\n\t"
"movl %%EAX,%%EDX\n\t"
"xorl %0,%%EAX\n\t"
"push %%EAX\n\t"
"popf\n\t"
"pushf\n\t"
"pop %%EAX\n\t"
"xorl %%EDX,%%EAX\n\t"
"push %%EDX\n\t"
"popf\n\t"
"andl %%EAX, %0\n\t":
"=c" (flag) : "c" (flag) :
"%eax", "%edx");
#endif
return flag;
} }
#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
#if defined(NEED_CHECK_FOR_CPUID)
#define EFALGS_CPUID_BIT 21
UInt32 a;
__asm__ __volatile__ (
ASM_LN "pushf"
ASM_LN "pushf"
ASM_LN "pop %0"
// ASM_LN "movl %0, %1"
// ASM_LN "xorl $0x200000, %0"
ASM_LN "btc %1, %0"
ASM_LN "push %0"
ASM_LN "popf"
ASM_LN "pushf"
ASM_LN "pop %0"
ASM_LN "xorl (%%esp), %0"
ASM_LN "popf"
ASM_LN
: "=&r" (a) // "=a"
: "i" (EFALGS_CPUID_BIT)
);
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
return 0;
#endif
{
UInt32 p[4];
x86_cpuid_MACRO(p, 0)
return p[0];
}
}
#undef ASM_LN
#elif !defined(_MSC_VER)
/*
// for gcc/clang and other: we can try to use __cpuid macro:
#include <cpuid.h>
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
__cpuid(func, p[0], p[1], p[2], p[3]);
}
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
return (UInt32)__get_cpuid_max(0, NULL);
}
*/
// for unsupported cpuid:
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
UNUSED_VAR(func)
p[0] = p[1] = p[2] = p[3] = 0;
}
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
return 0;
}
#else // _MSC_VER
#if !defined(MY_CPU_AMD64)
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
#if defined(NEED_CHECK_FOR_CPUID)
#define EFALGS_CPUID_BIT 21
__asm pushfd
__asm pushfd
/*
__asm pop eax
// __asm mov edx, eax
__asm btc eax, EFALGS_CPUID_BIT
__asm push eax
*/
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
__asm popfd
__asm pushfd
__asm pop eax
// __asm xor eax, edx
__asm xor eax, [esp]
// __asm push edx
__asm popfd
__asm and eax, (1 shl EFALGS_CPUID_BIT)
__asm jz end_func
#endif
__asm push ebx
__asm xor eax, eax // func
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
__asm cpuid
__asm pop ebx
#if defined(NEED_CHECK_FOR_CPUID)
end_func:
#endif
__asm ret 0
}
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
UNUSED_VAR(p)
UNUSED_VAR(func)
__asm push ebx
__asm push edi
__asm mov edi, ecx // p
__asm mov eax, edx // func
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
__asm cpuid
__asm mov [edi ], eax
__asm mov [edi + 4], ebx
__asm mov [edi + 8], ecx
__asm mov [edi + 12], edx
__asm pop edi
__asm pop ebx
__asm ret 0
}
#else // MY_CPU_AMD64
#if _MSC_VER >= 1600
#include <intrin.h>
#define MY_cpuidex __cpuidex
#else
/*
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
We still can use __cpuid for low (func) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FASTCALL / NO_INLINE func,
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
*/
static
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
{
UNUSED_VAR(subFunction)
__cpuid(CPUInfo, func);
}
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
#endif // _MSC_VER >= 1600
#if !defined(MY_CPU_AMD64)
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
so we disable inlining here */
Z7_NO_INLINE
#endif
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
MY_cpuidex((int *)p, (int)func, 0);
}
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
int a[4];
MY_cpuidex(a, 0, 0);
return a[0];
}
#endif // MY_CPU_AMD64
#endif // _MSC_VER
#if defined(NEED_CHECK_FOR_CPUID)
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
#else #else
#define CHECK_CPUID_IS_SUPPORTED #define CHECK_CPUID_IS_SUPPORTED
#endif #endif
#undef NEED_CHECK_FOR_CPUID
#ifndef USE_ASM
#ifdef _MSC_VER
#if _MSC_VER >= 1600
#define MY__cpuidex __cpuidex
#else
/*
__cpuid (function == 4) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) doesn't clear ECX
We still can use __cpuid for low (function) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FAST_CALL / NO_INLINE function,
So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
*/
static static
MY_NO_INLINE BoolInt x86cpuid_Func_1(UInt32 *p)
void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
{
UNUSED_VAR(subFunction);
__cpuid(CPUInfo, function);
}
#define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
#pragma message("======== MY__cpuidex_HACK WAS USED ========")
#endif
#else
#define MY__cpuidex(info, func, func2) __cpuid(info, func)
#pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
#endif
#endif
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
#ifdef _MSC_VER
UInt32 a2, b2, c2, d2;
__asm xor EBX, EBX;
__asm xor ECX, ECX;
__asm xor EDX, EDX;
__asm mov EAX, function;
__asm cpuid;
__asm mov a2, EAX;
__asm mov b2, EBX;
__asm mov c2, ECX;
__asm mov d2, EDX;
*a = a2;
*b = b2;
*c = c2;
*d = d2;
#else
__asm__ __volatile__ (
#if defined(MY_CPU_AMD64) && defined(__PIC__)
"mov %%rbx, %%rdi;"
"cpuid;"
"xchg %%rbx, %%rdi;"
: "=a" (*a) ,
"=D" (*b) ,
#elif defined(MY_CPU_X86) && defined(__PIC__)
"mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*a) ,
"=D" (*b) ,
#else
"cpuid"
: "=a" (*a) ,
"=b" (*b) ,
#endif
"=c" (*c) ,
"=d" (*d)
: "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
MY__cpuidex(CPUInfo, (int)function, 0);
*a = (UInt32)CPUInfo[0];
*b = (UInt32)CPUInfo[1];
*c = (UInt32)CPUInfo[2];
*d = (UInt32)CPUInfo[3];
#endif
}
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
{ {
CHECK_CPUID_IS_SUPPORTED CHECK_CPUID_IS_SUPPORTED
MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); z7_x86_cpuid(p, 1);
MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
return True; return True;
} }
static const UInt32 kVendors[][3] = /*
static const UInt32 kVendors[][1] =
{ {
{ 0x756E6547, 0x49656E69, 0x6C65746E}, { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
{ 0x68747541, 0x69746E65, 0x444D4163}, { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
{ 0x746E6543, 0x48727561, 0x736C7561} { 0x746E6543 } // , 0x48727561, 0x736C7561 }
}; };
*/
/*
typedef struct
{
UInt32 maxFunc;
UInt32 vendor[3];
UInt32 ver;
UInt32 b;
UInt32 c;
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
int x86cpuid_GetFirm(const Cx86cpuid *p) int x86cpuid_GetFirm(const Cx86cpuid *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
{ {
const UInt32 *v = kVendors[i]; const UInt32 *v = kVendors[i];
if (v[0] == p->vendor[0] && if (v[0] == p->vendor[0]
v[1] == p->vendor[1] && // && v[1] == p->vendor[1]
v[2] == p->vendor[2]) // && v[2] == p->vendor[2]
)
return (int)i; return (int)i;
} }
return -1; return -1;
@@ -190,41 +321,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p)
BoolInt CPU_Is_InOrder() BoolInt CPU_Is_InOrder()
{ {
Cx86cpuid p; Cx86cpuid p;
int firm;
UInt32 family, model; UInt32 family, model;
if (!x86cpuid_CheckAndRead(&p)) if (!x86cpuid_CheckAndRead(&p))
return True; return True;
family = x86cpuid_GetFamily(p.ver); family = x86cpuid_ver_GetFamily(p.ver);
model = x86cpuid_GetModel(p.ver); model = x86cpuid_ver_GetModel(p.ver);
firm = x86cpuid_GetFirm(&p);
switch (firm) switch (x86cpuid_GetFirm(&p))
{ {
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
/* In-Order Atom CPU */ // In-Order Atom CPU
model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
|| model == 0x26 /* 45 nm, Z6xx */ || model == 0x26 // 45 nm, Z6xx
|| model == 0x27 /* 32 nm, Z2460 */ || model == 0x27 // 32 nm, Z2460
|| model == 0x35 /* 32 nm, Z2760 */ || model == 0x35 // 32 nm, Z2760
|| model == 0x36 /* 32 nm, N2xxx, D2xxx */ || model == 0x36 // 32 nm, N2xxx, D2xxx
))); )));
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
} }
return True; return False; // v23 : unknown processors are not In-Order
} }
*/
#ifdef _WIN32
#include "7zWindows.h"
#endif
#if !defined(MY_CPU_AMD64) && defined(_WIN32) #if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported() /* for legacy SSE ia32: there is no user-space cpu instruction to check
that OS supports SSE register storing/restoring on context switches.
So we need some OS-specific function to check that it's safe to use SSE registers.
*/
Z7_FORCE_INLINE
static BoolInt CPU_Sys_Is_SSE_Supported(void)
{ {
OSVERSIONINFO vi; #ifdef _MSC_VER
vi.dwOSVersionInfoSize = sizeof(vi); #pragma warning(push)
if (!GetVersionEx(&vi)) #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
return False; #endif
return (vi.dwMajorVersion >= 5); /* low byte is major version of Windows
We suppose that any Windows version since
Windows2000 (major == 5) supports SSE registers */
return (Byte)GetVersion() >= 5;
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
} }
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
#else #else
@@ -232,94 +377,300 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
#endif #endif
static UInt32 X86_CPUID_ECX_Get_Flags() #if !defined(MY_CPU_AMD64)
BoolInt CPU_IsSupported_CMOV(void)
{ {
Cx86cpuid p; UInt32 a[4];
CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0]))
if (!x86cpuid_CheckAndRead(&p))
return 0; return 0;
return p.c; return (a[3] >> 15) & 1;
} }
BoolInt CPU_IsSupported_AES() BoolInt CPU_IsSupported_SSE(void)
{ {
return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; UInt32 a[4];
}
BoolInt CPU_IsSupported_SSSE3()
{
return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41()
{
return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p)) if (!x86cpuid_Func_1(&a[0]))
return False; return 0;
return (a[3] >> 25) & 1;
}
if (p.maxFunc < 7) BoolInt CPU_IsSupported_SSE2(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 26) & 1;
}
#endif
static UInt32 x86cpuid_Func_1_ECX(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return a[2];
}
BoolInt CPU_IsSupported_AES(void)
{
return (x86cpuid_Func_1_ECX() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3(void)
{
return (x86cpuid_Func_1_ECX() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41(void)
{
return (x86cpuid_Func_1_ECX() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA(void)
{
CHECK_SYS_SSE_SUPPORT
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False; return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 7);
return (d[1] >> 29) & 1; return (d[1] >> 29) & 1;
} }
} }
// #include <stdio.h> /*
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
<immintrin.h> that we can use or check.
For any 32-bit x86 we can use asm code in MSVC,
but MSVC asm code is huge after compilation.
So _xgetbv() is better
#ifdef _WIN32 ICC: _xgetbv() intrinsic is available (in what version of ICC?)
#include <Windows.h> ICC defines (__GNUC___) and it supports gnu assembler
also ICC supports MASM style code with -use-msasm switch.
but ICC doesn't support __attribute__((__target__))
GCC/CLANG 9:
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
and we need __attribute__((__target__("xsave")).
But with __target__("xsave") the function will be not
inlined to function that has no __target__("xsave") attribute.
If we want _xgetbv() call inlining, then we should use asm version
instead of calling _xgetbv().
Note:intrinsic is broke before GCC 8.2:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
*/
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|| defined(__clang__) && (__clang_major__ >= 9)
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
#if defined(__INTEL_COMPILER)
#define ATTRIB_XGETBV
#elif defined(__GNUC__) || defined(__clang__)
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
#else
#define ATTRIB_XGETBV
#endif
#endif #endif
BoolInt CPU_IsSupported_AVX2() #if defined(ATTRIB_XGETBV)
{ #include <immintrin.h>
Cx86cpuid p; #endif
CHECK_SYS_SSE_SUPPORT
// XFEATURE_ENABLED_MASK/XCR0
#define MY_XCR_XFEATURE_ENABLED_MASK 0
#if defined(ATTRIB_XGETBV)
ATTRIB_XGETBV
#endif
static UInt64 x86_xgetbv_0(UInt32 num)
{
#if defined(ATTRIB_XGETBV)
{
return
#if (defined(_MSC_VER))
_xgetbv(num);
#else
__builtin_ia32_xgetbv(
#if !defined(__clang__)
(int)
#endif
num);
#endif
}
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
UInt32 a, d;
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
__asm__
(
"xgetbv"
: "=a"(a), "=d"(d) : "c"(num) : "cc"
);
#else // is old gcc
__asm__
(
".byte 0x0f, 0x01, 0xd0" "\n\t"
: "=a"(a), "=d"(d) : "c"(num) : "cc"
);
#endif
return ((UInt64)d << 32) | a;
// return a;
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
UInt32 a, d;
__asm {
push eax
push edx
push ecx
mov ecx, num;
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
_emit 0x0f
_emit 0x01
_emit 0xd0
mov a, eax
mov d, edx
pop ecx
pop edx
pop eax
}
return ((UInt64)d << 32) | a;
// return a;
#else // it's unknown compiler
// #error "Need xgetbv function"
UNUSED_VAR(num)
// for MSVC-X64 we could call external function from external file.
/* Actually we had checked OSXSAVE/AVX in cpuid before.
So it's expected that OS supports at least AVX and below. */
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
return
// (1 << 0) | // x87
(1 << 1) // SSE
| (1 << 2); // AVX
#endif
}
#ifdef _WIN32
/*
Windows versions do not know about new ISA extensions that
can be introduced. But we still can use new extensions,
even if Windows doesn't report about supporting them,
But we can use new extensions, only if Windows knows about new ISA extension
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
So it's enough to check
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
instead of
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
*/
#define MY_PF_XSAVE_ENABLED 17
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
#endif
BoolInt CPU_IsSupported_AVX(void)
{
#ifdef _WIN32 #ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17 if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False; return False;
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
some latest Win10 revisions. But we need AVX in older Windows also.
So we don't use the following check: */
/*
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
return False;
*/
#endif #endif
if (!x86cpuid_CheckAndRead(&p)) /*
OS must use new special XSAVE/XRSTOR instructions to save
AVX registers when it required for context switching.
At OS statring:
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
Also OS sets bitmask in XCR0 register that defines what
registers will be processed by XSAVE instruction:
XCR0.SSE[bit 0] - x87 registers and state
XCR0.SSE[bit 1] - SSE registers and state
XCR0.AVX[bit 2] - AVX registers and state
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
So we can read that bit in user-space.
XCR0 is available for reading in user-space by new XGETBV instruction.
*/
{
const UInt32 c = x86cpuid_Func_1_ECX();
if (0 == (1
& (c >> 28) // AVX instructions are supported by hardware
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
return False;
}
/* also we can check
CPUID.1:ECX.XSAVE [bit 26] : that shows that
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
in most cases we expect that OS also will support storing/restoring
for AVX and SSE states at least.
But to be ensure for that we call user-space instruction
XGETBV(0) to get XCR0 value that contains bitmask that defines
what exact states(registers) OS have enabled for storing/restoring.
*/
{
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=%d\n", bm);
return 1
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
}
// since Win7SP1: we can use GetEnabledXStateFeatures();
}
BoolInt CPU_IsSupported_AVX2(void)
{
if (!CPU_IsSupported_AVX())
return False; return False;
if (p.maxFunc < 7) if (z7_x86_cpuid_GetMaxFunc() < 7)
return False; return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1 return 1
& (d[1] >> 5); // avx2 & (d[1] >> 5); // avx2
} }
} }
BoolInt CPU_IsSupported_VAES_AVX2() BoolInt CPU_IsSupported_VAES_AVX2(void)
{ {
Cx86cpuid p; if (!CPU_IsSupported_AVX())
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False; return False;
#endif if (z7_x86_cpuid_GetMaxFunc() < 7)
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False; return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1 return 1
& (d[1] >> 5) // avx2 & (d[1] >> 5) // avx2
@@ -328,20 +679,15 @@ BoolInt CPU_IsSupported_VAES_AVX2()
} }
} }
BoolInt CPU_IsSupported_PageGB() BoolInt CPU_IsSupported_PageGB(void)
{ {
Cx86cpuid cpuid; CHECK_CPUID_IS_SUPPORTED
if (!x86cpuid_CheckAndRead(&cpuid))
return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 0x80000000);
if (d[0] < 0x80000001) if (d[0] < 0x80000001)
return False; return False;
} z7_x86_cpuid(d, 0x80000001);
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
return (d[3] >> 26) & 1; return (d[3] >> 26) & 1;
} }
} }
@@ -351,11 +697,11 @@ BoolInt CPU_IsSupported_PageGB()
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else #else
@@ -378,28 +724,27 @@ static void Print_sysctlbyname(const char *name)
} }
} }
*/ */
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
{ {
UInt32 val = 0; UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
return 1; return 1;
return 0; return 0;
} }
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
BoolInt CPU_IsSupported_CRC32(void) BoolInt CPU_IsSupported_CRC32(void)
{ {
return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
} }
BoolInt CPU_IsSupported_NEON(void) BoolInt CPU_IsSupported_NEON(void)
{ {
return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
} }
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
@@ -461,15 +806,15 @@ MY_HWCAP_CHECK_FUNC (AES)
#include <sys/sysctl.h> #include <sys/sysctl.h>
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
{ {
return sysctlbyname(name, buf, bufSize, NULL, 0); return sysctlbyname(name, buf, bufSize, NULL, 0);
} }
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
{ {
size_t bufSize = sizeof(*val); size_t bufSize = sizeof(*val);
int res = My_sysctlbyname_Get(name, val, &bufSize); const int res = z7_sysctlbyname_Get(name, val, &bufSize);
if (res == 0 && bufSize != sizeof(*val)) if (res == 0 && bufSize != sizeof(*val))
return EFAULT; return EFAULT;
return res; return res;

View File

@@ -1,8 +1,8 @@
/* CpuArch.h -- CPU specific code /* CpuArch.h -- CPU specific code
2021-07-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H #ifndef ZIP7_INC_CPU_ARCH_H
#define __CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H
#include "7zTypes.h" #include "7zTypes.h"
@@ -51,7 +51,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(__AARCH64EB__) \ || defined(__AARCH64EB__) \
|| defined(__aarch64__) || defined(__aarch64__)
#define MY_CPU_ARM64 #define MY_CPU_ARM64
#define MY_CPU_NAME "arm64" #ifdef __ILP32__
#define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "arm64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT #define MY_CPU_64BIT
#endif #endif
@@ -68,8 +74,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define MY_CPU_ARM #define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_ARMT
#define MY_CPU_NAME "armt" #define MY_CPU_NAME "armt"
#else #else
#define MY_CPU_ARM32
#define MY_CPU_NAME "arm" #define MY_CPU_NAME "arm"
#endif #endif
/* #define MY_CPU_32BIT */ /* #define MY_CPU_32BIT */
@@ -103,6 +111,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(__PPC__) \ || defined(__PPC__) \
|| defined(_POWER) || defined(_POWER)
#define MY_CPU_PPC_OR_PPC64
#if defined(__ppc64__) \ #if defined(__ppc64__) \
|| defined(__powerpc64__) \ || defined(__powerpc64__) \
|| defined(_LP64) \ || defined(_LP64) \
@@ -123,12 +133,15 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#if defined(__sparc64__) #if defined(__riscv) \
#define MY_CPU_NAME "sparc64" || defined(__riscv__)
#define MY_CPU_64BIT #if __riscv_xlen == 32
#elif defined(__sparc__) #define MY_CPU_NAME "riscv32"
#define MY_CPU_NAME "sparc" #elif __riscv_xlen == 64
/* #define MY_CPU_32BIT */ #define MY_CPU_NAME "riscv64"
#else
#define MY_CPU_NAME "riscv"
#endif
#endif #endif
@@ -194,6 +207,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#error Stop_Compiling_Bad_Endian #error Stop_Compiling_Bad_Endian
#endif #endif
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT #error Stop_Compiling_Bad_32_64_BIT
@@ -250,6 +266,67 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef __has_builtin
#define Z7_has_builtin(x) __has_builtin(x)
#else
#define Z7_has_builtin(x) 0
#endif
#define Z7_BSWAP32_CONST(v) \
( (((UInt32)(v) << 24) ) \
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
| (((UInt32)(v) >> 24) ))
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
#include <stdlib.h>
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
#define Z7_BSWAP16(v) _byteswap_ushort(v)
#define Z7_BSWAP32(v) _byteswap_ulong (v)
#define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16))
#define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v)
#define Z7_BSWAP64(v) __builtin_bswap64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#else
#define Z7_BSWAP16(v) ((UInt16) \
( ((UInt32)(v) << 8) \
| ((UInt32)(v) >> 8) \
))
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
#define Z7_BSWAP64(v) \
( ( ( (UInt64)(v) ) << 8 * 7 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
)
#endif
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \ #if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) || defined(MY_CPU_ARM64)
@@ -269,13 +346,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64 #ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) #define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif #endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else #else
@@ -302,51 +377,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#ifndef MY_CPU_LE_UNALIGN_64 #ifndef GetUi64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#endif
#ifndef SetUi64
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \ SetUi32(_ppp2_ , (UInt32)_vvv2_) \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
#endif #endif
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
#ifdef __has_builtin #if defined(MY_CPU_LE_UNALIGN_64)
#define MY__has_builtin(x) __has_builtin(x) #define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
#else
#define MY__has_builtin(x) 0
#endif #endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
#include <stdlib.h>
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
#elif defined(MY_CPU_LE_UNALIGN) && ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
#else #else
#define GetBe32(p) ( \ #define GetBe32(p) ( \
@@ -355,8 +405,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
((UInt32)((const Byte *)(p))[2] << 8) | \ ((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] ) ((const Byte *)(p))[3] )
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \ _ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \ _ppp_[1] = (Byte)(_vvv_ >> 16); \
@@ -365,50 +413,83 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#ifndef GetBe64
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#endif
#ifndef GetBe16 #ifndef GetBe16
#define GetBe16(p) ( (UInt16) ( \ #define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \ ((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] )) ((const Byte *)(p))[1] ))
#endif #endif
#if defined(MY_CPU_BE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
#elif defined(MY_CPU_LE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
#else
#error Stop_Compiling_Unknown_Endian_CONV
#endif
#if defined(MY_CPU_BE)
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
#define SetUi16a(p, v) SetUi16(p, v)
#elif defined(MY_CPU_LE)
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v)
#define SetBe16a(p, v) SetBe16(p, v)
#else
#error Stop_Compiling_Unknown_Endian_CPU_a
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_OR_ARM64) \
|| defined(MY_CPU_PPC_OR_PPC64)
#define Z7_CPU_FAST_ROTATE_SUPPORTED
#endif
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
typedef struct void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
{ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
UInt32 maxFunc; #if defined(MY_CPU_AMD64)
UInt32 vendor[3]; #define Z7_IF_X86_CPUID_SUPPORTED
UInt32 ver; #else
UInt32 b; #define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
UInt32 c; #endif
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void);
BoolInt CPU_IsSupported_SSE2(void);
BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void); BoolInt CPU_IsSupported_SHA(void);
@@ -433,8 +514,8 @@ BoolInt CPU_IsSupported_AES(void);
#endif #endif
#if defined(__APPLE__) #if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif #endif
EXTERN_C_END EXTERN_C_END

View File

@@ -1,5 +1,5 @@
/* LzFind.c -- Match finder for LZ algorithms /* LzFind.c -- Match finder for LZ algorithms
2021-11-29 : Igor Pavlov : Public domain */ 2023-03-14 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@@ -17,7 +17,7 @@
#define kEmptyHashValue 0 #define kEmptyHashValue 0
#define kMaxValForNormalize ((UInt32)0) #define kMaxValForNormalize ((UInt32)0)
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug // #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
@@ -67,10 +67,10 @@
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
{ {
if (!p->directInput) // if (!p->directInput)
{ {
ISzAlloc_Free(alloc, p->bufferBase); ISzAlloc_Free(alloc, p->bufBase);
p->bufferBase = NULL; p->bufBase = NULL;
} }
} }
@@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
{ {
if (blockSize == 0) if (blockSize == 0)
return 0; return 0;
if (!p->bufferBase || p->blockSize != blockSize) if (!p->bufBase || p->blockSize != blockSize)
{ {
// size_t blockSizeT; // size_t blockSizeT;
LzInWindow_Free(p, alloc); LzInWindow_Free(p, alloc);
@@ -101,11 +101,11 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
#endif #endif
*/ */
p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
// printf("\nbufferBase = %p\n", p->bufferBase); // printf("\nbufferBase = %p\n", p->bufBase);
// return 0; // for debug // return 0; // for debug
} }
return (p->bufferBase != NULL); return (p->bufBase != NULL);
} }
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
@@ -113,7 +113,7 @@ static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
MY_NO_INLINE Z7_NO_INLINE
static void MatchFinder_ReadBlock(CMatchFinder *p) static void MatchFinder_ReadBlock(CMatchFinder *p)
{ {
if (p->streamEndWasReached || p->result != SZ_OK) if (p->streamEndWasReached || p->result != SZ_OK)
@@ -127,8 +127,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
if (curSize > p->directInputRem) if (curSize > p->directInputRem)
curSize = (UInt32)p->directInputRem; curSize = (UInt32)p->directInputRem;
p->directInputRem -= curSize;
p->streamPos += curSize; p->streamPos += curSize;
p->directInputRem -= curSize;
if (p->directInputRem == 0) if (p->directInputRem == 0)
p->streamEndWasReached = 1; p->streamEndWasReached = 1;
return; return;
@@ -136,8 +136,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
for (;;) for (;;)
{ {
Byte *dest = p->buffer + GET_AVAIL_BYTES(p); const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
size_t size = (size_t)(p->bufferBase + p->blockSize - dest); size_t size = (size_t)(p->bufBase + p->blockSize - dest);
if (size == 0) if (size == 0)
{ {
/* we call ReadBlock() after NeedMove() and MoveBlock(). /* we call ReadBlock() after NeedMove() and MoveBlock().
@@ -153,7 +153,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
// #define kRead 3 // #define kRead 3
// if (size > kRead) size = kRead; // for debug // if (size > kRead) size = kRead; // for debug
p->result = ISeqInStream_Read(p->stream, dest, &size); /*
// we need cast (Byte *)dest.
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
*/
p->result = ISeqInStream_Read(p->stream,
p->bufBase + (dest - p->bufBase), &size);
if (p->result != SZ_OK) if (p->result != SZ_OK)
return; return;
if (size == 0) if (size == 0)
@@ -173,14 +180,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
MY_NO_INLINE Z7_NO_INLINE
void MatchFinder_MoveBlock(CMatchFinder *p) void MatchFinder_MoveBlock(CMatchFinder *p)
{ {
const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
p->buffer = p->bufferBase + keepBefore; p->buffer = p->bufBase + keepBefore;
memmove(p->bufferBase, memmove(p->bufBase,
p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
keepBefore + (size_t)GET_AVAIL_BYTES(p)); keepBefore + (size_t)GET_AVAIL_BYTES(p));
} }
@@ -198,7 +205,7 @@ int MatchFinder_NeedMove(CMatchFinder *p)
return 0; return 0;
if (p->streamEndWasReached || p->result != SZ_OK) if (p->streamEndWasReached || p->result != SZ_OK)
return 0; return 0;
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
} }
void MatchFinder_ReadIfRequired(CMatchFinder *p) void MatchFinder_ReadIfRequired(CMatchFinder *p)
@@ -214,6 +221,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
p->cutValue = 32; p->cutValue = 32;
p->btMode = 1; p->btMode = 1;
p->numHashBytes = 4; p->numHashBytes = 4;
p->numHashBytes_Min = 2;
p->numHashOutBits = 0;
p->bigHash = 0; p->bigHash = 0;
} }
@@ -222,8 +231,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
void MatchFinder_Construct(CMatchFinder *p) void MatchFinder_Construct(CMatchFinder *p)
{ {
unsigned i; unsigned i;
p->bufferBase = NULL; p->buffer = NULL;
p->bufBase = NULL;
p->directInput = 0; p->directInput = 0;
p->stream = NULL;
p->hash = NULL; p->hash = NULL;
p->expectedDataSize = (UInt64)(Int64)-1; p->expectedDataSize = (UInt64)(Int64)-1;
MatchFinder_SetDefaultSettings(p); MatchFinder_SetDefaultSettings(p);
@@ -238,6 +249,8 @@ void MatchFinder_Construct(CMatchFinder *p)
} }
} }
#undef kCrcPoly
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
{ {
ISzAlloc_Free(alloc, p->hash); ISzAlloc_Free(alloc, p->hash);
@@ -252,7 +265,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
{ {
size_t sizeInBytes = (size_t)num * sizeof(CLzRef); const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
if (sizeInBytes / sizeof(CLzRef) != num) if (sizeInBytes / sizeof(CLzRef) != num)
return NULL; return NULL;
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
@@ -298,6 +311,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
} }
// input is historySize
static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
{
if (p->numHashBytes == 2)
return (1 << 16) - 1;
if (hs != 0)
hs--;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
return hs;
}
// input is historySize
static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
{
if (p->numHashBytes == 2)
return (1 << 16) - 1;
if (hs != 0)
hs--;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
hs >>= 1;
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
else
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
return hs;
}
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc) ISzAllocPtr alloc)
@@ -318,78 +387,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
p->blockSize = 0; p->blockSize = 0;
if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
{ {
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it size_t hashSizeSum;
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{ {
// UInt32 hs4; UInt32 hs;
p->fixedHashSize = 0; UInt32 hsCur;
hs = (1 << 16) - 1;
if (p->numHashBytes != 2) if (p->numHashOutBits != 0)
{ {
hs = historySize; unsigned numBits = p->numHashOutBits;
if (hs > p->expectedDataSize) const unsigned nbMax =
hs = (UInt32)p->expectedDataSize; (p->numHashBytes == 2 ? 16 :
if (hs != 0) (p->numHashBytes == 3 ? 24 : 32));
hs--; if (numBits > nbMax)
hs |= (hs >> 1); numBits = nbMax;
hs |= (hs >> 2); if (numBits >= 32)
hs |= (hs >> 4); hs = (UInt32)0 - 1;
hs |= (hs >> 8); else
// we propagated 16 bits in (hs). Low 16 bits must be set later hs = ((UInt32)1 << numBits) - 1;
hs >>= 1;
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
else
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// hs = ((UInt32)1 << 25) - 1; // for test
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */ hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5) if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1; hs |= (256 << kLzHash_CrcShift_2) - 1;
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
if (hs > hs2)
hs = hs2;
}
hsCur = hs;
if (p->expectedDataSize < historySize)
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
if (hsCur > hs2)
hsCur = hs2;
}
}
else
{
hs = MatchFinder_GetHashMask(p, historySize);
hsCur = hs;
if (p->expectedDataSize < historySize)
{
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
if (hsCur > hs) // is it possible?
hsCur = hs;
}
} }
p->hashMask = hs;
hs++;
/* p->hashMask = hsCur;
hs4 = (1 << 20);
if (hs4 > hs)
hs4 = hs;
// hs4 = (1 << 16); // for test
p->hash4Mask = hs4 - 1;
*/
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; hashSizeSum = hs;
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; hashSizeSum++;
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; if (hashSizeSum < hs)
hs += p->fixedHashSize; return 0;
{
UInt32 fixedHashSize = 0;
if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
hashSizeSum += fixedHashSize;
p->fixedHashSize = fixedHashSize;
}
} }
p->matchMaxLen = matchMaxLen;
{ {
size_t newSize; size_t newSize;
size_t numSons; size_t numSons;
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
p->historySize = historySize; p->historySize = historySize;
p->hashSizeSum = hs;
p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
numSons = newCyclicBufferSize; numSons = newCyclicBufferSize;
if (p->btMode) if (p->btMode)
numSons <<= 1; numSons <<= 1;
newSize = hs + numSons; newSize = hashSizeSum + numSons;
if (numSons < newCyclicBufferSize || newSize < numSons)
return 0;
// aligned size is not required here, but it can be better for some loops // aligned size is not required here, but it can be better for some loops
#define NUM_REFS_ALIGN_MASK 0xF #define NUM_REFS_ALIGN_MASK 0xF
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
if (p->hash && p->numRefs == newSize) // 22.02: we don't reallocate buffer, if old size is enough
if (p->hash && p->numRefs >= newSize)
return 1; return 1;
MatchFinder_FreeThisClassMemory(p, alloc); MatchFinder_FreeThisClassMemory(p, alloc);
@@ -398,7 +480,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
if (p->hash) if (p->hash)
{ {
p->son = p->hash + p->hashSizeSum; p->son = p->hash + hashSizeSum;
return 1; return 1;
} }
} }
@@ -470,7 +552,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
void MatchFinder_Init_4(CMatchFinder *p) void MatchFinder_Init_4(CMatchFinder *p)
{ {
p->buffer = p->bufferBase; if (!p->directInput)
p->buffer = p->bufBase;
{ {
/* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
the code in CMatchFinderMt expects (pos = 1) */ the code in CMatchFinderMt expects (pos = 1) */
@@ -507,20 +590,20 @@ void MatchFinder_Init(CMatchFinder *p)
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__) && (__clang_major__ >= 8) \ #if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 8) \ || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
#define USE_SATUR_SUB_128
#define USE_AVX2 #define USE_LZFIND_SATUR_SUB_128
#define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) #define USE_LZFIND_SATUR_SUB_256
#define ATTRIB_AVX2 __attribute__((__target__("avx2"))) #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
#define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if (_MSC_VER >= 1600) #if (_MSC_VER >= 1600)
#define USE_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#if (_MSC_VER >= 1900) #endif
#define USE_AVX2 #if (_MSC_VER >= 1900)
#include <immintrin.h> // avx #define USE_LZFIND_SATUR_SUB_256
#endif
#endif #endif
#endif #endif
@@ -529,16 +612,16 @@ void MatchFinder_Init(CMatchFinder *p)
#if defined(__clang__) && (__clang_major__ >= 8) \ #if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8) || defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
// #define ATTRIB_SSE41 __attribute__((__target__(""))) // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
#else #else
// #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif #endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if (_MSC_VER >= 1910) #if (_MSC_VER >= 1910)
#define USE_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#endif #endif
#endif #endif
@@ -550,121 +633,130 @@ void MatchFinder_Init(CMatchFinder *p)
#endif #endif
/*
#ifndef ATTRIB_SSE41
#define ATTRIB_SSE41
#endif
#ifndef ATTRIB_AVX2
#define ATTRIB_AVX2
#endif
*/
#ifdef USE_SATUR_SUB_128 #ifdef USE_LZFIND_SATUR_SUB_128
// #define _SHOW_HW_STATUS // #define Z7_SHOW_HW_STATUS
#ifdef _SHOW_HW_STATUS #ifdef Z7_SHOW_HW_STATUS
#include <stdio.h> #include <stdio.h>
#define _PRF(x) x #define PRF(x) x
_PRF(;) PRF(;)
#else #else
#define _PRF(x) #define PRF(x)
#endif #endif
#ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM_OR_ARM64
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
// #define FORCE_SATUR_SUB_128 // #define FORCE_LZFIND_SATUR_SUB_128
#endif #endif
typedef uint32x4_t LzFind_v128;
#define SASUB_128_V(v, s) \
vsubq_u32(vmaxq_u32(v, s), s)
typedef uint32x4_t v128; #else // MY_CPU_ARM_OR_ARM64
#define SASUB_128(i) \
*(v128 *)(void *)(items + (i) * 4) = \
vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
#else
#include <smmintrin.h> // sse4.1 #include <smmintrin.h> // sse4.1
typedef __m128i v128; typedef __m128i LzFind_v128;
// SSE 4.1
#define SASUB_128_V(v, s) \
_mm_sub_epi32(_mm_max_epu32(v, s), s)
#endif // MY_CPU_ARM_OR_ARM64
#define SASUB_128(i) \ #define SASUB_128(i) \
*(v128 *)(void *)(items + (i) * 4) = \ *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
_mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
#endif
Z7_NO_INLINE
MY_NO_INLINE
static static
#ifdef ATTRIB_SSE41 #ifdef LZFIND_ATTRIB_SSE41
ATTRIB_SSE41 LZFIND_ATTRIB_SSE41
#endif #endif
void void
MY_FAST_CALL Z7_FASTCALL
LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{ {
v128 sub2 = const LzFind_v128 sub2 =
#ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM_OR_ARM64
vdupq_n_u32(subValue); vdupq_n_u32(subValue);
#else #else
_mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
#endif #endif
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do do
{ {
SASUB_128(0) SASUB_128(0) SASUB_128(1) items += 2 * 4;
SASUB_128(1) SASUB_128(0) SASUB_128(1) items += 2 * 4;
SASUB_128(2)
SASUB_128(3)
items += 4 * 4;
} }
while (items != lim); while (items != lim);
} }
#ifdef USE_AVX2 #ifdef USE_LZFIND_SATUR_SUB_256
#include <immintrin.h> // avx #include <immintrin.h> // avx
/*
clang :immintrin.h uses
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX2__)
#include <avx2intrin.h>
#endif
so we need <avxintrin.h> for clang-cl */
#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 #if defined(__clang__)
#include <avxintrin.h>
#include <avx2intrin.h>
#endif
MY_NO_INLINE // AVX2:
#define SASUB_256(i) \
*( __m256i *)( void *)(items + (i) * 8) = \
_mm256_sub_epi32(_mm256_max_epu32( \
*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
Z7_NO_INLINE
static static
#ifdef ATTRIB_AVX2 #ifdef LZFIND_ATTRIB_AVX2
ATTRIB_AVX2 LZFIND_ATTRIB_AVX2
#endif #endif
void void
MY_FAST_CALL Z7_FASTCALL
LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{ {
__m256i sub2 = _mm256_set_epi32( const __m256i sub2 = _mm256_set_epi32(
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do do
{ {
SASUB_256(0) SASUB_256(0) SASUB_256(1) items += 2 * 8;
SASUB_256(1) SASUB_256(0) SASUB_256(1) items += 2 * 8;
items += 2 * 8;
} }
while (items != lim); while (items != lim);
} }
#endif // USE_AVX2 #endif // USE_LZFIND_SATUR_SUB_256
#ifndef FORCE_SATUR_SUB_128 #ifndef FORCE_LZFIND_SATUR_SUB_128
typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
UInt32 subValue, CLzRef *items, const CLzRef *lim); UInt32 subValue, CLzRef *items, const CLzRef *lim);
static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
#endif // FORCE_SATUR_SUB_128 #endif // FORCE_LZFIND_SATUR_SUB_128
#endif // USE_SATUR_SUB_128 #endif // USE_LZFIND_SATUR_SUB_128
// kEmptyHashValue must be zero // kEmptyHashValue must be zero
// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; // #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; #define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
#ifdef FORCE_SATUR_SUB_128 #ifdef FORCE_LZFIND_SATUR_SUB_128
#define DEFAULT_SaturSub LzFind_SaturSub_128 #define DEFAULT_SaturSub LzFind_SaturSub_128
@@ -672,24 +764,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
#define DEFAULT_SaturSub LzFind_SaturSub_32 #define DEFAULT_SaturSub LzFind_SaturSub_32
MY_NO_INLINE Z7_NO_INLINE
static static
void void
MY_FAST_CALL Z7_FASTCALL
LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{ {
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do do
{ {
UInt32 v; SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(0) SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(1) SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(2) SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(3)
SASUB_32(4)
SASUB_32(5)
SASUB_32(6)
SASUB_32(7)
items += 8;
} }
while (items != lim); while (items != lim);
} }
@@ -697,27 +784,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
#endif #endif
MY_NO_INLINE Z7_NO_INLINE
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{ {
#define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
CLzRef *lim; for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
{ {
UInt32 v; SASUB_32(0)
SASUB_32(0);
items++; items++;
} }
{ {
#define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
numItems &= K_NORM_ALIGN_MASK; numItems &= k_Align_Mask;
if (items != lim) if (items != lim)
{ {
#if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
if (g_LzFind_SaturSub) if (g_LzFind_SaturSub)
g_LzFind_SaturSub(subValue, items, lim); g_LzFind_SaturSub(subValue, items, lim);
else else
@@ -726,12 +809,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
} }
items = lim; items = lim;
} }
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0; numItems--) for (; numItems != 0; numItems--)
{ {
UInt32 v; SASUB_32(0)
SASUB_32(0);
items++; items++;
} }
} }
@@ -740,7 +821,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
// call MatchFinder_CheckLimits() only after (p->pos++) update // call MatchFinder_CheckLimits() only after (p->pos++) update
MY_NO_INLINE Z7_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p) static void MatchFinder_CheckLimits(CMatchFinder *p)
{ {
if (// !p->streamEndWasReached && p->result == SZ_OK && if (// !p->streamEndWasReached && p->result == SZ_OK &&
@@ -768,11 +849,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
// const UInt32 subValue = (1 << 15); // for debug // const UInt32 subValue = (1 << 15); // for debug
// printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
size_t numSonRefs = p->cyclicBufferSize; MatchFinder_REDUCE_OFFSETS(p, subValue)
if (p->btMode) MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
numSonRefs <<= 1; {
Inline_MatchFinder_ReduceOffsets(p, subValue); size_t numSonRefs = p->cyclicBufferSize;
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); if (p->btMode)
numSonRefs <<= 1;
MatchFinder_Normalize3(subValue, p->son, numSonRefs);
}
} }
if (p->cyclicBufferPos == p->cyclicBufferSize) if (p->cyclicBufferPos == p->cyclicBufferSize)
@@ -785,7 +869,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
/* /*
(lenLimit > maxLen) (lenLimit > maxLen)
*/ */
MY_FORCE_INLINE Z7_FORCE_INLINE
static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *d, unsigned maxLen) UInt32 *d, unsigned maxLen)
@@ -867,7 +951,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
} }
MY_FORCE_INLINE Z7_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *d, UInt32 maxLen) UInt32 *d, UInt32 maxLen)
@@ -1004,7 +1088,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
#define MOVE_POS_RET MOVE_POS return distances; #define MOVE_POS_RET MOVE_POS return distances;
MY_NO_INLINE Z7_NO_INLINE
static void MatchFinder_MovePos(CMatchFinder *p) static void MatchFinder_MovePos(CMatchFinder *p)
{ {
/* we go here at the end of stream data, when (avail < num_hash_bytes) /* we go here at the end of stream data, when (avail < num_hash_bytes)
@@ -1015,11 +1099,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
if (p->btMode) if (p->btMode)
p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
*/ */
MOVE_POS; MOVE_POS
} }
#define GET_MATCHES_HEADER2(minLen, ret_op) \ #define GET_MATCHES_HEADER2(minLen, ret_op) \
unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer; cur = p->buffer;
@@ -1028,11 +1112,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); #define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
distances = func(MF_PARAMS(p), \ distances = func(MF_PARAMS(p), \
distances, (UInt32)_maxLen_); MOVE_POS_RET; distances, (UInt32)_maxLen_); MOVE_POS_RET
#define GET_MATCHES_FOOTER_BT(_maxLen_) \ #define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@@ -1052,7 +1136,7 @@ static void MatchFinder_MovePos(CMatchFinder *p)
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{ {
GET_MATCHES_HEADER(2) GET_MATCHES_HEADER(2)
HASH2_CALC; HASH2_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_BT(1) GET_MATCHES_FOOTER_BT(1)
@@ -1061,7 +1145,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{ {
GET_MATCHES_HEADER(3) GET_MATCHES_HEADER(3)
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_BT(2) GET_MATCHES_FOOTER_BT(2)
@@ -1082,7 +1166,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(3) GET_MATCHES_HEADER(3)
HASH3_CALC; HASH3_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@@ -1107,7 +1191,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
SkipMatchesSpec(MF_PARAMS(p)); SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET; MOVE_POS_RET
} }
} }
@@ -1123,7 +1207,7 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(4) GET_MATCHES_HEADER(4)
HASH4_CALC; HASH4_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@@ -1190,7 +1274,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(5) GET_MATCHES_HEADER(5)
HASH5_CALC; HASH5_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@@ -1246,7 +1330,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
SkipMatchesSpec(MF_PARAMS(p)); SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET; MOVE_POS_RET
} }
break; break;
} }
@@ -1263,7 +1347,7 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(4) GET_MATCHES_HEADER(4)
HASH4_CALC; HASH4_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@@ -1314,12 +1398,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
p->son[p->cyclicBufferPos] = curMatch; p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET; MOVE_POS_RET
} }
break; break;
} }
GET_MATCHES_FOOTER_HC(maxLen); GET_MATCHES_FOOTER_HC(maxLen)
} }
@@ -1330,7 +1414,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(5) GET_MATCHES_HEADER(5)
HASH5_CALC; HASH5_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@@ -1386,19 +1470,19 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
p->son[p->cyclicBufferPos] = curMatch; p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET; MOVE_POS_RET
} }
break; break;
} }
GET_MATCHES_FOOTER_HC(maxLen); GET_MATCHES_FOOTER_HC(maxLen)
} }
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{ {
GET_MATCHES_HEADER(3) GET_MATCHES_HEADER(3)
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_HC(2) GET_MATCHES_FOOTER_HC(2)
@@ -1409,7 +1493,7 @@ static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
SKIP_HEADER(2) SKIP_HEADER(2)
{ {
HASH2_CALC; HASH2_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
} }
@@ -1420,7 +1504,7 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
SKIP_HEADER(3) SKIP_HEADER(3)
{ {
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
} }
@@ -1433,7 +1517,7 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
UInt32 h2; UInt32 h2;
UInt32 *hash; UInt32 *hash;
HASH3_CALC; HASH3_CALC
hash = p->hash; hash = p->hash;
curMatch = (hash + kFix3HashSize)[hv]; curMatch = (hash + kFix3HashSize)[hv];
hash[h2] = hash[h2] =
@@ -1448,7 +1532,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
UInt32 h2, h3; UInt32 h2, h3;
UInt32 *hash; UInt32 *hash;
HASH4_CALC; HASH4_CALC
hash = p->hash; hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv]; curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = hash [h2] =
@@ -1464,7 +1548,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
UInt32 h2, h3; UInt32 h2, h3;
UInt32 *hash; UInt32 *hash;
HASH5_CALC; HASH5_CALC
hash = p->hash; hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv]; curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = hash [h2] =
@@ -1478,7 +1562,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
#define HC_SKIP_HEADER(minLen) \ #define HC_SKIP_HEADER(minLen) \
do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
Byte *cur; \ const Byte *cur; \
UInt32 *hash; \ UInt32 *hash; \
UInt32 *son; \ UInt32 *son; \
UInt32 pos = p->pos; \ UInt32 pos = p->pos; \
@@ -1510,7 +1594,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
HC_SKIP_HEADER(4) HC_SKIP_HEADER(4)
UInt32 h2, h3; UInt32 h2, h3;
HASH4_CALC; HASH4_CALC
curMatch = (hash + kFix4HashSize)[hv]; curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = hash [h2] =
(hash + kFix3HashSize)[h3] = (hash + kFix3HashSize)[h3] =
@@ -1540,7 +1624,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
HC_SKIP_HEADER(3) HC_SKIP_HEADER(3)
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = hash[hv]; curMatch = hash[hv];
hash[hv] = pos; hash[hv] = pos;
@@ -1590,17 +1674,17 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
void LzFindPrepare() void LzFindPrepare(void)
{ {
#ifndef FORCE_SATUR_SUB_128 #ifndef FORCE_LZFIND_SATUR_SUB_128
#ifdef USE_SATUR_SUB_128 #ifdef USE_LZFIND_SATUR_SUB_128
LZFIND_SATUR_SUB_CODE_FUNC f = NULL; LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
#ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM_OR_ARM64
{ {
if (CPU_IsSupported_NEON()) if (CPU_IsSupported_NEON())
{ {
// #pragma message ("=== LzFind NEON") // #pragma message ("=== LzFind NEON")
_PRF(printf("\n=== LzFind NEON\n")); PRF(printf("\n=== LzFind NEON\n"));
f = LzFind_SaturSub_128; f = LzFind_SaturSub_128;
} }
// f = 0; // for debug // f = 0; // for debug
@@ -1609,20 +1693,25 @@ void LzFindPrepare()
if (CPU_IsSupported_SSE41()) if (CPU_IsSupported_SSE41())
{ {
// #pragma message ("=== LzFind SSE41") // #pragma message ("=== LzFind SSE41")
_PRF(printf("\n=== LzFind SSE41\n")); PRF(printf("\n=== LzFind SSE41\n"));
f = LzFind_SaturSub_128; f = LzFind_SaturSub_128;
#ifdef USE_AVX2 #ifdef USE_LZFIND_SATUR_SUB_256
if (CPU_IsSupported_AVX2()) if (CPU_IsSupported_AVX2())
{ {
// #pragma message ("=== LzFind AVX2") // #pragma message ("=== LzFind AVX2")
_PRF(printf("\n=== LzFind AVX2\n")); PRF(printf("\n=== LzFind AVX2\n"));
f = LzFind_SaturSub_256; f = LzFind_SaturSub_256;
} }
#endif #endif
} }
#endif // MY_CPU_ARM_OR_ARM64 #endif // MY_CPU_ARM_OR_ARM64
g_LzFind_SaturSub = f; g_LzFind_SaturSub = f;
#endif // USE_SATUR_SUB_128 #endif // USE_LZFIND_SATUR_SUB_128
#endif // FORCE_SATUR_SUB_128 #endif // FORCE_LZFIND_SATUR_SUB_128
} }
#undef MOVE_POS
#undef MOVE_POS_RET
#undef PRF

View File

@@ -1,8 +1,8 @@
/* LzFind.h -- Match finder for LZ algorithms /* LzFind.h -- Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H #ifndef ZIP7_INC_LZ_FIND_H
#define __LZ_FIND_H #define ZIP7_INC_LZ_FIND_H
#include "7zTypes.h" #include "7zTypes.h"
@@ -10,9 +10,9 @@ EXTERN_C_BEGIN
typedef UInt32 CLzRef; typedef UInt32 CLzRef;
typedef struct _CMatchFinder typedef struct
{ {
Byte *buffer; const Byte *buffer;
UInt32 pos; UInt32 pos;
UInt32 posLimit; UInt32 posLimit;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
@@ -32,8 +32,8 @@ typedef struct _CMatchFinder
UInt32 hashMask; UInt32 hashMask;
UInt32 cutValue; UInt32 cutValue;
Byte *bufferBase; Byte *bufBase;
ISeqInStream *stream; ISeqInStreamPtr stream;
UInt32 blockSize; UInt32 blockSize;
UInt32 keepSizeBefore; UInt32 keepSizeBefore;
@@ -43,7 +43,9 @@ typedef struct _CMatchFinder
size_t directInputRem; size_t directInputRem;
UInt32 historySize; UInt32 historySize;
UInt32 fixedHashSize; UInt32 fixedHashSize;
UInt32 hashSizeSum; Byte numHashBytes_Min;
Byte numHashOutBits;
Byte _pad2_[2];
SRes result; SRes result;
UInt32 crc[256]; UInt32 crc[256];
size_t numRefs; size_t numRefs;
@@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p);
/* Conditions: /* (directInput = 0) is default value.
historySize <= 3 GB It's required to provide correct (directInput) value
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB before calling MatchFinder_Create().
You can set (directInput) by any of the following calls:
- MatchFinder_SET_DIRECT_INPUT_BUF()
- MatchFinder_SET_STREAM()
- MatchFinder_SET_STREAM_MODE()
*/ */
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
(p)->stream = NULL; \
(p)->directInput = 1; \
(p)->buffer = (_src_); \
(p)->directInputRem = (_srcLen_); }
/*
#define MatchFinder_SET_STREAM_MODE(p) { \
(p)->directInput = 0; }
*/
#define MatchFinder_SET_STREAM(p, _stream_) { \
(p)->stream = _stream_; \
(p)->directInput = 0; }
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc); ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/* /*
#define Inline_MatchFinder_InitPos(p, val) \ #define MatchFinder_INIT_POS(p, val) \
(p)->pos = (val); \ (p)->pos = (val); \
(p)->streamPos = (val); (p)->streamPos = (val);
*/ */
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ // void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
(p)->pos -= (subValue); \ (p)->pos -= (subValue); \
(p)->streamPos -= (subValue); (p)->streamPos -= (subValue);
@@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder typedef struct
{ {
Mf_Init_Func Init; Mf_Init_Func Init;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;

View File

@@ -1,5 +1,5 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2021-12-21 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@@ -69,7 +69,7 @@ extern UInt64 g_NumIters_Bytes;
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define __MT_HASH4_CALC { \ #define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \ h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \ temp ^= ((UInt32)cur[2] << 8); \
@@ -79,14 +79,14 @@ extern UInt64 g_NumIters_Bytes;
*/ */
MY_NO_INLINE Z7_NO_INLINE
static void MtSync_Construct(CMtSync *p) static void MtSync_Construct(CMtSync *p)
{ {
p->affinity = 0; p->affinity = 0;
p->wasCreated = False; p->wasCreated = False;
p->csWasInitialized = False; p->csWasInitialized = False;
p->csWasEntered = False; p->csWasEntered = False;
Thread_Construct(&p->thread); Thread_CONSTRUCT(&p->thread)
Event_Construct(&p->canStart); Event_Construct(&p->canStart);
Event_Construct(&p->wasStopped); Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->freeSemaphore);
@@ -116,7 +116,7 @@ static void MtSync_Construct(CMtSync *p)
(p)->csWasEntered = False; } (p)->csWasEntered = False; }
MY_NO_INLINE Z7_NO_INLINE
static UInt32 MtSync_GetNextBlock(CMtSync *p) static UInt32 MtSync_GetNextBlock(CMtSync *p)
{ {
UInt32 numBlocks = 0; UInt32 numBlocks = 0;
@@ -140,14 +140,14 @@ static UInt32 MtSync_GetNextBlock(CMtSync *p)
// buffer is UNLOCKED here // buffer is UNLOCKED here
Semaphore_Wait(&p->filledSemaphore); Semaphore_Wait(&p->filledSemaphore);
LOCK_BUFFER(p); LOCK_BUFFER(p)
return numBlocks; return numBlocks;
} }
/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
MY_NO_INLINE Z7_NO_INLINE
static void MtSync_StopWriting(CMtSync *p) static void MtSync_StopWriting(CMtSync *p)
{ {
if (!Thread_WasCreated(&p->thread) || p->needStart) if (!Thread_WasCreated(&p->thread) || p->needStart)
@@ -185,7 +185,7 @@ static void MtSync_StopWriting(CMtSync *p)
} }
MY_NO_INLINE Z7_NO_INLINE
static void MtSync_Destruct(CMtSync *p) static void MtSync_Destruct(CMtSync *p)
{ {
PRF(printf("\nMtSync_Destruct %p\n", p)); PRF(printf("\nMtSync_Destruct %p\n", p));
@@ -220,11 +220,11 @@ static void MtSync_Destruct(CMtSync *p)
// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
// we want to get real system error codes here instead of SZ_ERROR_THREAD // we want to get real system error codes here instead of SZ_ERROR_THREAD
#define RINOK_THREAD(x) RINOK(x) #define RINOK_THREAD(x) RINOK_WRes(x)
// call it before each new file (when new starting is required): // call it before each new file (when new starting is required):
MY_NO_INLINE Z7_NO_INLINE
static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
{ {
WRes wres; WRes wres;
@@ -245,12 +245,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
if (p->wasCreated) if (p->wasCreated)
return SZ_OK; return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs)); RINOK_THREAD(CriticalSection_Init(&p->cs))
p->csWasInitialized = True; p->csWasInitialized = True;
p->csWasEntered = False; p->csWasEntered = False;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
p->needStart = True; p->needStart = True;
p->exit = True; /* p->exit is unused before (canStart) Event. p->exit = True; /* p->exit is unused before (canStart) Event.
@@ -264,13 +264,13 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
else else
wres = Thread_Create(&p->thread, startAddress, obj); wres = Thread_Create(&p->thread, startAddress, obj);
RINOK_THREAD(wres); RINOK_THREAD(wres)
p->wasCreated = True; p->wasCreated = True;
return SZ_OK; return SZ_OK;
} }
MY_NO_INLINE Z7_NO_INLINE
static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{ {
const WRes wres = MtSync_Create_WRes(p, startAddress, obj); const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
@@ -519,7 +519,7 @@ static void HashThreadFunc(CMatchFinderMt *mt)
if (mf->pos > (UInt32)kMtMaxValForNormalize - num) if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
{ {
const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
Inline_MatchFinder_ReduceOffsets(mf, subValue); MatchFinder_REDUCE_OFFSETS(mf, subValue)
MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
} }
@@ -560,7 +560,7 @@ static void HashThreadFunc(CMatchFinderMt *mt)
*/ */
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes); UInt32 *posRes);
@@ -749,7 +749,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
} }
MY_NO_INLINE Z7_NO_INLINE
static void BtThreadFunc(CMatchFinderMt *mt) static void BtThreadFunc(CMatchFinderMt *mt)
{ {
CMtSync *p = &mt->btSync; CMtSync *p = &mt->btSync;
@@ -864,15 +864,15 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
return SZ_OK; return SZ_OK;
} }
SRes MatchFinderMt_InitMt(CMatchFinderMt *p) SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
{ {
RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
return MtSync_Init(&p->btSync, kMtBtNumBlocks); return MtSync_Init(&p->btSync, kMtBtNumBlocks);
} }
@@ -941,7 +941,7 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
} }
MY_NO_INLINE Z7_NO_INLINE
static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{ {
if (p->failure_LZ_BT) if (p->failure_LZ_BT)
@@ -1163,7 +1163,7 @@ UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
*/ */
static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{ {
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
UInt32 *hash = p->hash; UInt32 *hash = p->hash;
@@ -1179,9 +1179,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
(hash + kFix3HashSize)[h3] = m; (hash + kFix3HashSize)[h3] = m;
// (hash + kFix4HashSize)[h4] = m; // (hash + kFix4HashSize)[h4] = m;
#define _USE_H2 // #define BT5_USE_H2
// #ifdef BT5_USE_H2
#ifdef _USE_H2
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{ {
d[1] = m - c2 - 1; d[1] = m - c2 - 1;
@@ -1197,8 +1196,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
} }
d[0] = 3; d[0] = 3;
d += 2; d += 2;
#ifdef _USE_H4 #ifdef BT5_USE_H4
if (c4 >= matchMinPos) if (c4 >= matchMinPos)
if ( if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
@@ -1214,7 +1213,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
d[0] = 2; d[0] = 2;
d += 2; d += 2;
} }
#endif // #endif
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{ {
@@ -1228,7 +1227,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
d += 2; d += 2;
} }
#ifdef _USE_H4 #ifdef BT5_USE_H4
if (c4 >= matchMinPos) if (c4 >= matchMinPos)
if ( if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
@@ -1244,7 +1243,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
} }
static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{ {
const UInt32 *bt = p->btBufPos; const UInt32 *bt = p->btBufPos;
const UInt32 len = *bt++; const UInt32 len = *bt++;
@@ -1268,7 +1267,7 @@ static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
{ {
const UInt32 *bt = p->btBufPos; const UInt32 *bt = p->btBufPos;
UInt32 len = *bt++; UInt32 len = *bt++;
@@ -1398,3 +1397,10 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
break; break;
} }
} }
#undef RINOK_THREAD
#undef PRF
#undef MF
#undef GetUi24hi_from32
#undef LOCK_BUFFER
#undef UNLOCK_BUFFER

View File

@@ -1,15 +1,15 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms /* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2021-07-12 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H #ifndef ZIP7_INC_LZ_FIND_MT_H
#define __LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H
#include "LzFind.h" #include "LzFind.h"
#include "Threads.h" #include "Threads.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
typedef struct _CMtSync typedef struct
{ {
UInt32 numProcessedBlocks; UInt32 numProcessedBlocks;
CThread thread; CThread thread;
@@ -39,7 +39,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct _CMatchFinderMt typedef struct
{ {
/* LZ */ /* LZ */
const Byte *pointerToCurPos; const Byte *pointerToCurPos;

View File

@@ -1,5 +1,5 @@
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@@ -41,8 +41,8 @@ UInt64 g_NumIters_Bytes;
// #define CYC_TO_POS_OFFSET 1 // for debug // #define CYC_TO_POS_OFFSET 1 // for debug
/* /*
MY_NO_INLINE Z7_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
{ {
do do
@@ -214,13 +214,13 @@ else
to eliminate "movsx" BUG in old MSVC x64 compiler. to eliminate "movsx" BUG in old MSVC x64 compiler.
*/ */
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes); UInt32 *posRes);
MY_NO_INLINE Z7_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes) UInt32 *posRes)
@@ -404,7 +404,7 @@ else
/* /*
typedef UInt32 uint32plus; // size_t typedef UInt32 uint32plus; // size_t
UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes) UInt32 *posRes)

View File

@@ -1,8 +1,8 @@
/* LzHash.h -- HASH functions for LZ algorithms /* LzHash.h -- HASH constants for LZ algorithms
2019-10-30 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H #ifndef ZIP7_INC_LZ_HASH_H
#define __LZ_HASH_H #define ZIP7_INC_LZ_HASH_H
/* /*
(kHash2Size >= (1 << 8)) : Required (kHash2Size >= (1 << 8)) : Required

View File

@@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder /* LzmaDec.c -- LZMA Decoder
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@@ -8,15 +8,15 @@
/* #include "CpuArch.h" */ /* #include "CpuArch.h" */
#include "LzmaDec.h" #include "LzmaDec.h"
#define kNumTopBits 24 // #define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits) #define kTopValue ((UInt32)1 << 24)
#define kNumBitModelTotalBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
#define RC_INIT_SIZE 5 #define RC_INIT_SIZE 5
#ifndef _LZMA_DEC_OPT #ifndef Z7_LZMA_DEC_OPT
#define kNumMoveBits 5 #define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
@@ -25,14 +25,14 @@
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
{ UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_0(p) i = (i + i); A0; } else \
{ UPDATE_1(p); i = (i + i) + 1; A1; } { UPDATE_1(p) i = (i + i) + 1; A1; }
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
{ UPDATE_0(p + i); A0; } else \ { UPDATE_0(p + i) A0; } else \
{ UPDATE_1(p + i); A1; } { UPDATE_1(p + i) A1; }
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
@@ -40,19 +40,19 @@
#define TREE_DECODE(probs, limit, i) \ #define TREE_DECODE(probs, limit, i) \
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
/* #define _LZMA_SIZE_OPT */ /* #define Z7_LZMA_SIZE_OPT */
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
#else #else
#define TREE_6_DECODE(probs, i) \ #define TREE_6_DECODE(probs, i) \
{ i = 1; \ { i = 1; \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
i -= 0x40; } i -= 0x40; }
#endif #endif
@@ -64,25 +64,25 @@
probLit = prob + (offs + bit + symbol); \ probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;) GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#endif // _LZMA_DEC_OPT #endif // Z7_LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound; #define UPDATE_0_CHECK range = bound;
#define UPDATE_1_CHECK range -= bound; code -= bound; #define UPDATE_1_CHECK range -= bound; code -= bound;
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
{ UPDATE_0_CHECK; i = (i + i); A0; } else \ { UPDATE_0_CHECK i = (i + i); A0; } else \
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; } { UPDATE_1_CHECK i = (i + i) + 1; A1; }
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
#define TREE_DECODE_CHECK(probs, limit, i) \ #define TREE_DECODE_CHECK(probs, limit, i) \
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
{ UPDATE_0_CHECK; i += m; m += m; } else \ { UPDATE_0_CHECK i += m; m += m; } else \
{ UPDATE_1_CHECK; m += m; i += m; } { UPDATE_1_CHECK m += m; i += m; }
#define kNumPosBitsMax 4 #define kNumPosBitsMax 4
@@ -224,14 +224,14 @@ Out:
*/ */
#ifdef _LZMA_DEC_OPT #ifdef Z7_LZMA_DEC_OPT
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
#else #else
static static
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{ {
CLzmaProb *probs = GET_PROBS; CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state; unsigned state = (unsigned)p->state;
@@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
unsigned symbol; unsigned symbol;
UPDATE_0(prob); UPDATE_0(prob)
prob = probs + Literal; prob = probs + Literal;
if (processedPos != 0 || checkDicSize != 0) if (processedPos != 0 || checkDicSize != 0)
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
@@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
{ {
state -= (state < 4) ? state : 3; state -= (state < 4) ? state : 3;
symbol = 1; symbol = 1;
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
do { NORMAL_LITER_DEC } while (symbol < 0x100); do { NORMAL_LITER_DEC } while (symbol < 0x100);
#else #else
NORMAL_LITER_DEC NORMAL_LITER_DEC
@@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
unsigned offs = 0x100; unsigned offs = 0x100;
state -= (state < 10) ? 3 : 6; state -= (state < 10) ? 3 : 6;
symbol = 1; symbol = 1;
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
do do
{ {
unsigned bit; unsigned bit;
@@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
} }
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRep + state; prob = probs + IsRep + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
state += kNumStates; state += kNumStates;
prob = probs + LenCoder; prob = probs + LenCoder;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG0 + state; prob = probs + IsRepG0 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
prob = probs + IsRep0Long + COMBINED_PS_STATE; prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
// that case was checked before with kBadRepCode // that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
@@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = state < kNumLitStates ? 9 : 11; state = state < kNumLitStates ? 9 : 11;
continue; continue;
} }
UPDATE_1(prob); UPDATE_1(prob)
} }
else else
{ {
UInt32 distance; UInt32 distance;
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG1 + state; prob = probs + IsRepG1 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
distance = rep1; distance = rep1;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG2 + state; prob = probs + IsRepG2 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
distance = rep2; distance = rep2;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
distance = rep3; distance = rep3;
rep3 = rep2; rep3 = rep2;
} }
@@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
prob = probs + RepLenCoder; prob = probs + RepLenCoder;
} }
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
{ {
unsigned lim, offset; unsigned lim, offset;
CLzmaProb *probLen = prob + LenChoice; CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
offset = 0; offset = 0;
lim = (1 << kLenNumLowBits); lim = (1 << kLenNumLowBits);
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols; offset = kLenNumLowSymbols;
lim = (1 << kLenNumLowBits); lim = (1 << kLenNumLowBits);
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenHigh; probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2; offset = kLenNumLowSymbols * 2;
lim = (1 << kLenNumHighBits); lim = (1 << kLenNumHighBits);
} }
} }
TREE_DECODE(probLen, lim, len); TREE_DECODE(probLen, lim, len)
len += offset; len += offset;
} }
#else #else
@@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
CLzmaProb *probLen = prob + LenChoice; CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
len = 1; len = 1;
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
len -= 8; len -= 8;
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
len = 1; len = 1;
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenHigh; probLen = prob + LenHigh;
TREE_DECODE(probLen, (1 << kLenNumHighBits), len); TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
len += kLenNumLowSymbols * 2; len += kLenNumLowSymbols * 2;
} }
} }
@@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
UInt32 distance; UInt32 distance;
prob = probs + PosSlot + prob = probs + PosSlot +
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
TREE_6_DECODE(prob, distance); TREE_6_DECODE(prob, distance)
if (distance >= kStartPosModelIndex) if (distance >= kStartPosModelIndex)
{ {
unsigned posSlot = (unsigned)distance; unsigned posSlot = (unsigned)distance;
@@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance++; distance++;
do do
{ {
REV_BIT_VAR(prob, distance, m); REV_BIT_VAR(prob, distance, m)
} }
while (--numDirectBits); while (--numDirectBits);
distance -= m; distance -= m;
@@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance <<= kNumAlignBits; distance <<= kNumAlignBits;
{ {
unsigned i = 1; unsigned i = 1;
REV_BIT_CONST(prob, i, 1); REV_BIT_CONST(prob, i, 1)
REV_BIT_CONST(prob, i, 2); REV_BIT_CONST(prob, i, 2)
REV_BIT_CONST(prob, i, 4); REV_BIT_CONST(prob, i, 4)
REV_BIT_LAST (prob, i, 8); REV_BIT_LAST (prob, i, 8)
distance |= i; distance |= i;
} }
if (distance == (UInt32)0xFFFFFFFF) if (distance == (UInt32)0xFFFFFFFF)
@@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
} }
while (dicPos < limit && buf < bufLimit); while (dicPos < limit && buf < bufLimit);
NORMALIZE; NORMALIZE
p->buf = buf; p->buf = buf;
p->range = range; p->range = range;
@@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{ {
unsigned len = (unsigned)p->remainLen; unsigned len = (unsigned)p->remainLen;
if (len == 0 /* || len >= kMatchSpecLenStart */) if (len == 0 /* || len >= kMatchSpecLenStart */)
@@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
(p->checkDicSize == p->prop.dicSize) (p->checkDicSize == p->prop.dicSize)
*/ */
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{ {
if (p->checkDicSize == 0) if (p->checkDicSize == 0)
{ {
@@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
else else
{ {
unsigned len; unsigned len;
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRep + state; prob = probs + IsRep + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
state = 0; state = 0;
prob = probs + LenCoder; prob = probs + LenCoder;
res = DUMMY_MATCH; res = DUMMY_MATCH;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
res = DUMMY_REP; res = DUMMY_REP;
prob = probs + IsRepG0 + state; prob = probs + IsRepG0 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
prob = probs + IsRep0Long + COMBINED_PS_STATE; prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
break; break;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
} }
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRepG1 + state; prob = probs + IsRepG1 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRepG2 + state; prob = probs + IsRepG2 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
} }
} }
} }
@@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
const CLzmaProb *probLen = prob + LenChoice; const CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0_CHECK(probLen) IF_BIT_0_CHECK(probLen)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
offset = 0; offset = 0;
limit = 1 << kLenNumLowBits; limit = 1 << kLenNumLowBits;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0_CHECK(probLen) IF_BIT_0_CHECK(probLen)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols; offset = kLenNumLowSymbols;
limit = 1 << kLenNumLowBits; limit = 1 << kLenNumLowBits;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
probLen = prob + LenHigh; probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2; offset = kLenNumLowSymbols * 2;
limit = 1 << kLenNumHighBits; limit = 1 << kLenNumHighBits;
} }
} }
TREE_DECODE_CHECK(probLen, limit, len); TREE_DECODE_CHECK(probLen, limit, len)
len += offset; len += offset;
} }
@@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
prob = probs + PosSlot + prob = probs + PosSlot +
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
kNumPosSlotBits); kNumPosSlotBits);
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
if (posSlot >= kStartPosModelIndex) if (posSlot >= kStartPosModelIndex)
{ {
unsigned numDirectBits = ((posSlot >> 1) - 1); unsigned numDirectBits = ((posSlot >> 1) - 1);
@@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
unsigned m = 1; unsigned m = 1;
do do
{ {
REV_BIT_CHECK(prob, i, m); REV_BIT_CHECK(prob, i, m)
} }
while (--numDirectBits); while (--numDirectBits);
} }
@@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
} }
break; break;
} }
NORMALIZE_CHECK; NORMALIZE_CHECK
*bufOut = buf; *bufOut = buf;
return res; return res;
@@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
*/ */
#define RETURN__NOT_FINISHED__FOR_FINISH \ #define RETURN_NOT_FINISHED_FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \ *status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode // return SZ_OK; // for relaxed mode
@@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
} }
if (p->remainLen != 0) if (p->remainLen != 0)
{ {
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
checkEndMarkNow = 1; checkEndMarkNow = 1;
} }
@@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
for (i = 0; i < (unsigned)dummyProcessed; i++) for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i]; p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data; // p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
bufLimit = src; bufLimit = src;
@@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed; p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data; // p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
} }
@@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
{ {
CLzmaProps propNew; CLzmaProps propNew;
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
p->prop = propNew; p->prop = propNew;
return SZ_OK; return SZ_OK;
} }
@@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
{ {
CLzmaProps propNew; CLzmaProps propNew;
SizeT dicBufSize; SizeT dicBufSize;
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
{ {
UInt32 dictSize = propNew.dicSize; UInt32 dictSize = propNew.dicSize;
SizeT mask = ((UInt32)1 << 12) - 1; SizeT mask = ((UInt32)1 << 12) - 1;
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
dicBufSize = ((SizeT)dictSize + mask) & ~mask; dicBufSize = ((SizeT)dictSize + mask) & ~mask;
if (dicBufSize < dictSize) if (dicBufSize < dictSize)
dicBufSize = dictSize; dicBufSize = dictSize;
@@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
*status = LZMA_STATUS_NOT_SPECIFIED; *status = LZMA_STATUS_NOT_SPECIFIED;
if (inSize < RC_INIT_SIZE) if (inSize < RC_INIT_SIZE)
return SZ_ERROR_INPUT_EOF; return SZ_ERROR_INPUT_EOF;
LzmaDec_Construct(&p); LzmaDec_CONSTRUCT(&p)
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
p.dic = dest; p.dic = dest;
p.dicBufSize = outSize; p.dicBufSize = outSize;
LzmaDec_Init(&p); LzmaDec_Init(&p);

View File

@@ -1,19 +1,19 @@
/* LzmaDec.h -- LZMA Decoder /* LzmaDec.h -- LZMA Decoder
2020-03-19 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H #ifndef ZIP7_INC_LZMA_DEC_H
#define __LZMA_DEC_H #define ZIP7_INC_LZMA_DEC_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */ /* #define Z7_LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs, /* Z7_LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */ but memory usage for CLzmaDec::probs will be doubled in that case */
typedef typedef
#ifdef _LZMA_PROB32 #ifdef Z7_LZMA_PROB32
UInt32 UInt32
#else #else
UInt16 UInt16
@@ -25,7 +25,7 @@ typedef
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps typedef struct
{ {
Byte lc; Byte lc;
Byte lp; Byte lp;
@@ -73,7 +73,8 @@ typedef struct
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec; } CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } #define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
void LzmaDec_Init(CLzmaDec *p); void LzmaDec_Init(CLzmaDec *p);

View File

@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder /* LzmaEnc.c -- LZMA Encoder
2021-11-18: Igor Pavlov : Public domain */ 2023-04-13: Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@@ -16,22 +16,22 @@
#include "LzmaEnc.h" #include "LzmaEnc.h"
#include "LzFind.h" #include "LzFind.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "LzFindMt.h" #include "LzFindMt.h"
#endif #endif
/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig); ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
void LzmaEnc_Finish(CLzmaEncHandle pp); void LzmaEnc_Finish(CLzmaEncHandle p);
void LzmaEnc_SaveState(CLzmaEncHandle pp); void LzmaEnc_SaveState(CLzmaEncHandle p);
void LzmaEnc_RestoreState(CLzmaEncHandle pp); void LzmaEnc_RestoreState(CLzmaEncHandle p);
#ifdef SHOW_STAT #ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0; static unsigned g_STAT_OFFSET = 0;
@@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0;
/* for good normalization speed we still reserve 256 MB before 4 GB range */ /* for good normalization speed we still reserve 256 MB before 4 GB range */
#define kLzmaMaxHistorySize ((UInt32)15 << 28) #define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24 // #define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits) #define kTopValue ((UInt32)1 << 24)
#define kNumBitModelTotalBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
@@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->dictSize = p->mc = 0; p->dictSize = p->mc = 0;
p->reduceSize = (UInt64)(Int64)-1; p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->numHashOutBits = 0;
p->writeEndMark = 0; p->writeEndMark = 0;
p->affinity = 0; p->affinity = 0;
} }
@@ -99,7 +100,7 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->numThreads < 0) if (p->numThreads < 0)
p->numThreads = p->numThreads =
#ifndef _7ZIP_ST #ifndef Z7_ST
((p->btMode && p->algo) ? 2 : 1); ((p->btMode && p->algo) ? 2 : 1);
#else #else
1; 1;
@@ -293,7 +294,7 @@ typedef struct
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
typedef typedef
#ifdef _LZMA_PROB32 #ifdef Z7_LZMA_PROB32
UInt32 UInt32
#else #else
UInt16 UInt16
@@ -350,7 +351,7 @@ typedef struct
Byte *buf; Byte *buf;
Byte *bufLim; Byte *bufLim;
Byte *bufBase; Byte *bufBase;
ISeqOutStream *outStream; ISeqOutStreamPtr outStream;
UInt64 processed; UInt64 processed;
SRes res; SRes res;
} CRangeEnc; } CRangeEnc;
@@ -383,7 +384,7 @@ typedef struct
typedef UInt32 CProbPrice; typedef UInt32 CProbPrice;
typedef struct struct CLzmaEnc
{ {
void *matchFinderObj; void *matchFinderObj;
IMatchFinder2 matchFinder; IMatchFinder2 matchFinder;
@@ -426,7 +427,7 @@ typedef struct
UInt32 dictSize; UInt32 dictSize;
SRes result; SRes result;
#ifndef _7ZIP_ST #ifndef Z7_ST
BoolInt mtMode; BoolInt mtMode;
// begin of CMatchFinderMt is used in LZ thread // begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt; CMatchFinderMt matchFinderMt;
@@ -439,7 +440,7 @@ typedef struct
// we suppose that we have 8-bytes alignment after CMatchFinder // we suppose that we have 8-bytes alignment after CMatchFinder
#ifndef _7ZIP_ST #ifndef Z7_ST
Byte pad[128]; Byte pad[128];
#endif #endif
@@ -479,77 +480,59 @@ typedef struct
CSaveState saveState; CSaveState saveState;
// BoolInt mf_Failure; // BoolInt mf_Failure;
#ifndef _7ZIP_ST #ifndef Z7_ST
Byte pad2[128]; Byte pad2[128];
#endif #endif
} CLzmaEnc; };
#define MFB (p->matchFinderBase) #define MFB (p->matchFinderBase)
/* /*
#ifndef _7ZIP_ST #ifndef Z7_ST
#define MFB (p->matchFinderMt.MatchFinder) #define MFB (p->matchFinderMt.MatchFinder)
#endif #endif
*/ */
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); // #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p;
// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
void LzmaEnc_SaveState(CLzmaEncHandle pp) #define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
#define COPY_LZMA_ENC_STATE(d, s, p) \
(d)->state = (s)->state; \
COPY_ARR(d, s, reps) \
COPY_ARR(d, s, posAlignEncoder) \
COPY_ARR(d, s, isRep) \
COPY_ARR(d, s, isRepG0) \
COPY_ARR(d, s, isRepG1) \
COPY_ARR(d, s, isRepG2) \
COPY_ARR(d, s, isMatch) \
COPY_ARR(d, s, isRep0Long) \
COPY_ARR(d, s, posSlotEncoder) \
COPY_ARR(d, s, posEncoders) \
(d)->lenProbs = (s)->lenProbs; \
(d)->repLenProbs = (s)->repLenProbs; \
memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb));
void LzmaEnc_SaveState(CLzmaEncHandle p)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
CSaveState *dest = &p->saveState; CSaveState *v = &p->saveState;
COPY_LZMA_ENC_STATE(v, p, p)
dest->state = p->state; }
dest->lenProbs = p->lenProbs;
dest->repLenProbs = p->repLenProbs;
COPY_ARR(dest, p, reps); void LzmaEnc_RestoreState(CLzmaEncHandle p)
{
COPY_ARR(dest, p, posAlignEncoder); // GET_CLzmaEnc_p
COPY_ARR(dest, p, isRep); const CSaveState *v = &p->saveState;
COPY_ARR(dest, p, isRepG0); COPY_LZMA_ENC_STATE(p, v, p)
COPY_ARR(dest, p, isRepG1);
COPY_ARR(dest, p, isRepG2);
COPY_ARR(dest, p, isMatch);
COPY_ARR(dest, p, isRep0Long);
COPY_ARR(dest, p, posSlotEncoder);
COPY_ARR(dest, p, posEncoders);
memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));
} }
void LzmaEnc_RestoreState(CLzmaEncHandle pp) Z7_NO_INLINE
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
{ {
CLzmaEnc *dest = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
const CSaveState *p = &dest->saveState;
dest->state = p->state;
dest->lenProbs = p->lenProbs;
dest->repLenProbs = p->repLenProbs;
COPY_ARR(dest, p, reps);
COPY_ARR(dest, p, posAlignEncoder);
COPY_ARR(dest, p, isRep);
COPY_ARR(dest, p, isRepG0);
COPY_ARR(dest, p, isRepG1);
COPY_ARR(dest, p, isRepG2);
COPY_ARR(dest, p, isMatch);
COPY_ARR(dest, p, isRep0Long);
COPY_ARR(dest, p, posSlotEncoder);
COPY_ARR(dest, p, posEncoders);
memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));
}
SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
CLzmaEncProps props = *props2; CLzmaEncProps props = *props2;
LzmaEncProps_Normalize(&props); LzmaEncProps_Normalize(&props);
@@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
p->fastMode = (props.algo == 0); p->fastMode = (props.algo == 0);
// p->_maxMode = True; // p->_maxMode = True;
MFB.btMode = (Byte)(props.btMode ? 1 : 0); MFB.btMode = (Byte)(props.btMode ? 1 : 0);
// MFB.btMode = (Byte)(props.btMode);
{ {
unsigned numHashBytes = 4; unsigned numHashBytes = 4;
if (props.btMode) if (props.btMode)
@@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.numHashBytes >= 5) numHashBytes = 5; if (props.numHashBytes >= 5) numHashBytes = 5;
MFB.numHashBytes = numHashBytes; MFB.numHashBytes = numHashBytes;
// MFB.numHashBytes_Min = 2;
MFB.numHashOutBits = (Byte)props.numHashOutBits;
} }
MFB.cutValue = props.mc; MFB.cutValue = props.mc;
p->writeEndMark = (BoolInt)props.writeEndMark; p->writeEndMark = (BoolInt)props.writeEndMark;
#ifndef _7ZIP_ST #ifndef Z7_ST
/* /*
if (newMultiThread != _multiThread) if (newMultiThread != _multiThread)
{ {
@@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
} }
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
MFB.expectedDataSize = expectedDataSiize; MFB.expectedDataSize = expectedDataSiize;
} }
@@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p)
p->res = SZ_OK; p->res = SZ_OK;
} }
MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
{ {
const size_t num = (size_t)(p->buf - p->bufBase); const size_t num = (size_t)(p->buf - p->bufBase);
if (p->res == SZ_OK) if (p->res == SZ_OK)
@@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
p->buf = p->bufBase; p->buf = p->bufBase;
} }
MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
{ {
UInt32 low = (UInt32)p->low; UInt32 low = (UInt32)p->low;
unsigned high = (unsigned)(p->low >> 32); unsigned high = (unsigned)(p->low >> 32);
@@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p)
ttt = *(prob); \ ttt = *(prob); \
newBound = (range >> kNumBitModelTotalBits) * ttt; newBound = (range >> kNumBitModelTotalBits) * ttt;
// #define _LZMA_ENC_USE_BRANCH // #define Z7_LZMA_ENC_USE_BRANCH
#ifdef _LZMA_ENC_USE_BRANCH #ifdef Z7_LZMA_ENC_USE_BRANCH
#define RC_BIT(p, prob, bit) { \ #define RC_BIT(p, prob, bit) { \
RC_BIT_PRE(p, prob) \ RC_BIT_PRE(p, prob) \
@@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
CLzmaProb *prob = probs + (sym >> 8); CLzmaProb *prob = probs + (sym >> 8);
UInt32 bit = (sym >> 7) & 1; UInt32 bit = (sym >> 7) & 1;
sym <<= 1; sym <<= 1;
RC_BIT(p, prob, bit); RC_BIT(p, prob, bit)
} }
while (sym < 0x10000); while (sym < 0x10000);
p->range = range; p->range = range;
@@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn
bit = (sym >> 7) & 1; bit = (sym >> 7) & 1;
sym <<= 1; sym <<= 1;
offs &= ~(matchByte ^ sym); offs &= ~(matchByte ^ sym);
RC_BIT(p, prob, bit); RC_BIT(p, prob, bit)
} }
while (sym < 0x10000); while (sym < 0x10000);
p->range = range; p->range = range;
@@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
#define GET_PRICE(prob, bit) \ #define GET_PRICE(prob, bit) \
p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
#define GET_PRICEa(prob, bit) \ #define GET_PRICEa(prob, bit) \
ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
@@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi
unsigned bit = sym & 1; unsigned bit = sym & 1;
// RangeEnc_EncodeBit(rc, probs + m, bit); // RangeEnc_EncodeBit(rc, probs + m, bit);
sym >>= 1; sym >>= 1;
RC_BIT(rc, probs + m, bit); RC_BIT(rc, probs + m, bit)
m = (m << 1) | bit; m = (m << 1) | bit;
} }
while (--numBits); while (--numBits);
@@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
UInt32 range, ttt, newBound; UInt32 range, ttt, newBound;
CLzmaProb *probs = p->low; CLzmaProb *probs = p->low;
range = rc->range; range = rc->range;
RC_BIT_PRE(rc, probs); RC_BIT_PRE(rc, probs)
if (sym >= kLenNumLowSymbols) if (sym >= kLenNumLowSymbols)
{ {
RC_BIT_1(rc, probs); RC_BIT_1(rc, probs)
probs += kLenNumLowSymbols; probs += kLenNumLowSymbols;
RC_BIT_PRE(rc, probs); RC_BIT_PRE(rc, probs)
if (sym >= kLenNumLowSymbols * 2) if (sym >= kLenNumLowSymbols * 2)
{ {
RC_BIT_1(rc, probs); RC_BIT_1(rc, probs)
rc->range = range; rc->range = range;
// RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
@@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
{ {
unsigned m; unsigned m;
unsigned bit; unsigned bit;
RC_BIT_0(rc, probs); RC_BIT_0(rc, probs)
probs += (posState << (1 + kLenNumLowBits)); probs += (posState << (1 + kLenNumLowBits));
bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit;
bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit;
bit = sym & 1; RC_BIT(rc, probs + m, bit); bit = sym & 1; RC_BIT(rc, probs + m, bit)
rc->range = range; rc->range = range;
} }
} }
@@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price
} }
MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
CLenPriceEnc *p, CLenPriceEnc *p,
unsigned numPosStates, unsigned numPosStates,
const CLenEnc *enc, const CLenEnc *enc,
@@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
+ GET_PRICE_1(p->isRep[state]) \ + GET_PRICE_1(p->isRep[state]) \
+ GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRepG0[state])
MY_FORCE_INLINE Z7_FORCE_INLINE
static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
{ {
UInt32 price; UInt32 price;
@@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
LitEnc_GetPrice(probs, curByte, p->ProbPrices)); LitEnc_GetPrice(probs, curByte, p->ProbPrices));
} }
MakeAs_Lit(&p->opt[1]); MakeAs_Lit(&p->opt[1])
matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
@@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (shortRepPrice < p->opt[1].price) if (shortRepPrice < p->opt[1].price)
{ {
p->opt[1].price = shortRepPrice; p->opt[1].price = shortRepPrice;
MakeAs_ShortRep(&p->opt[1]); MakeAs_ShortRep(&p->opt[1])
} }
if (last < 2) if (last < 2)
{ {
@@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
else else
{ {
unsigned slot; unsigned slot;
GetPosSlot2(dist, slot); GetPosSlot2(dist, slot)
price += p->alignPrices[dist & kAlignMask]; price += p->alignPrices[dist & kAlignMask];
price += p->posSlotPrices[lenToPosState][slot]; price += p->posSlotPrices[lenToPosState][slot];
} }
@@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
unsigned delta = best - cur; unsigned delta = best - cur;
if (delta != 0) if (delta != 0)
{ {
MOVE_POS(p, delta); MOVE_POS(p, delta)
} }
} }
cur = best; cur = best;
@@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
{ {
nextOpt->price = litPrice; nextOpt->price = litPrice;
nextOpt->len = 1; nextOpt->len = 1;
MakeAs_Lit(nextOpt); MakeAs_Lit(nextOpt)
nextIsLit = True; nextIsLit = True;
} }
} }
@@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
{ {
nextOpt->price = shortRepPrice; nextOpt->price = shortRepPrice;
nextOpt->len = 1; nextOpt->len = 1;
MakeAs_ShortRep(nextOpt); MakeAs_ShortRep(nextOpt)
nextIsLit = False; nextIsLit = False;
} }
} }
@@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
dist = MATCHES[(size_t)offs + 1]; dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances) // if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot); GetPosSlot2(dist, posSlot)
for (len = /*2*/ startLen; ; len++) for (len = /*2*/ startLen; ; len++)
{ {
@@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
break; break;
dist = MATCHES[(size_t)offs + 1]; dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances) // if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot); GetPosSlot2(dist, posSlot)
} }
} }
} }
@@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
{ {
UInt32 ttt, newBound; UInt32 ttt, newBound;
RC_BIT_PRE(p, probs + m) RC_BIT_PRE(p, probs + m)
RC_BIT_1(&p->rc, probs + m); RC_BIT_1(&p->rc, probs + m)
m = (m << 1) + 1; m = (m << 1) + 1;
} }
while (m < (1 << kNumPosSlotBits)); while (m < (1 << kNumPosSlotBits));
@@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
{ {
UInt32 ttt, newBound; UInt32 ttt, newBound;
RC_BIT_PRE(p, probs + m) RC_BIT_PRE(p, probs + m)
RC_BIT_1(&p->rc, probs + m); RC_BIT_1(&p->rc, probs + m)
m = (m << 1) + 1; m = (m << 1) + 1;
} }
while (m < kAlignTableSize); while (m < kAlignTableSize);
@@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p)
if (p->rc.res != SZ_OK) if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE; p->result = SZ_ERROR_WRITE;
#ifndef _7ZIP_ST #ifndef Z7_ST
if ( if (
// p->mf_Failure || // p->mf_Failure ||
(p->mtMode && (p->mtMode &&
@@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p)
p->matchFinderMt.failure_LZ_BT)) p->matchFinderMt.failure_LZ_BT))
) )
{ {
p->result = MY_HRES_ERROR__INTERNAL_ERROR; p->result = MY_HRES_ERROR_INTERNAL_ERROR;
// printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
} }
#endif #endif
@@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p)
} }
MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
{ {
/* ReleaseMFStream(); */ /* ReleaseMFStream(); */
p->finished = True; p->finished = True;
@@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
} }
MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
{ {
unsigned i; unsigned i;
const CProbPrice *ProbPrices = p->ProbPrices; const CProbPrice *ProbPrices = p->ProbPrices;
@@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
} }
MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
{ {
// int y; for (y = 0; y < 100; y++) { // int y; for (y = 0; y < 100; y++) {
@@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
RangeEnc_Construct(&p->rc); RangeEnc_Construct(&p->rc);
MatchFinder_Construct(&MFB); MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST #ifndef Z7_ST
p->matchFinderMt.MatchFinder = &MFB; p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt); MatchFinderMt_Construct(&p->matchFinderMt);
#endif #endif
@@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
{ {
CLzmaEncProps props; CLzmaEncProps props;
LzmaEncProps_Init(&props); LzmaEncProps_Init(&props);
LzmaEnc_SetProps(p, &props); LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
} }
#ifndef LZMA_LOG_BSR #ifndef LZMA_LOG_BSR
@@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif #endif
@@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); // GET_CLzmaEnc_p
LzmaEnc_Destruct(p, alloc, allocBig);
ISzAlloc_Free(alloc, p); ISzAlloc_Free(alloc, p);
} }
MY_NO_INLINE Z7_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
{ {
UInt32 nowPos32, startPos32; UInt32 nowPos32, startPos32;
if (p->needInit) if (p->needInit)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtMode) if (p->mtMode)
{ {
RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
} }
#endif #endif
p->matchFinder.Init(p->matchFinderObj); p->matchFinder.Init(p->matchFinderObj);
@@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
if (p->finished) if (p->finished)
return p->result; return p->result;
RINOK(CheckErrors(p)); RINOK(CheckErrors(p))
nowPos32 = (UInt32)p->nowPos64; nowPos32 = (UInt32)p->nowPos64;
startPos32 = nowPos32; startPos32 = nowPos32;
@@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
const Byte *data; const Byte *data;
unsigned state; unsigned state;
RC_BIT_0(&p->rc, probs); RC_BIT_0(&p->rc, probs)
p->rc.range = range; p->rc.range = range;
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
probs = LIT_PROBS(nowPos32, *(data - 1)); probs = LIT_PROBS(nowPos32, *(data - 1));
@@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
} }
else else
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRep[p->state]; probs = &p->isRep[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist < LZMA_NUM_REPS) if (dist < LZMA_NUM_REPS)
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRepG0[p->state]; probs = &p->isRepG0[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist == 0) if (dist == 0)
{ {
RC_BIT_0(&p->rc, probs); RC_BIT_0(&p->rc, probs)
probs = &p->isRep0Long[p->state][posState]; probs = &p->isRep0Long[p->state][posState];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (len != 1) if (len != 1)
{ {
RC_BIT_1_BASE(&p->rc, probs); RC_BIT_1_BASE(&p->rc, probs)
} }
else else
{ {
RC_BIT_0_BASE(&p->rc, probs); RC_BIT_0_BASE(&p->rc, probs)
p->state = kShortRepNextStates[p->state]; p->state = kShortRepNextStates[p->state];
} }
} }
else else
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRepG1[p->state]; probs = &p->isRepG1[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist == 1) if (dist == 1)
{ {
RC_BIT_0_BASE(&p->rc, probs); RC_BIT_0_BASE(&p->rc, probs)
dist = p->reps[1]; dist = p->reps[1];
} }
else else
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRepG2[p->state]; probs = &p->isRepG2[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist == 2) if (dist == 2)
{ {
RC_BIT_0_BASE(&p->rc, probs); RC_BIT_0_BASE(&p->rc, probs)
dist = p->reps[2]; dist = p->reps[2];
} }
else else
{ {
RC_BIT_1_BASE(&p->rc, probs); RC_BIT_1_BASE(&p->rc, probs)
dist = p->reps[3]; dist = p->reps[3];
p->reps[3] = p->reps[2]; p->reps[3] = p->reps[2];
} }
@@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
else else
{ {
unsigned posSlot; unsigned posSlot;
RC_BIT_0(&p->rc, probs); RC_BIT_0(&p->rc, probs)
p->rc.range = range; p->rc.range = range;
p->state = kMatchNextStates[p->state]; p->state = kMatchNextStates[p->state];
@@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
p->reps[0] = dist + 1; p->reps[0] = dist + 1;
p->matchPriceCount++; p->matchPriceCount++;
GetPosSlot(dist, posSlot); GetPosSlot(dist, posSlot)
// RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
{ {
UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
@@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
sym <<= 1; sym <<= 1;
RC_BIT(&p->rc, prob, bit); RC_BIT(&p->rc, prob, bit)
} }
while (sym < (1 << kNumPosSlotBits * 2)); while (sym < (1 << kNumPosSlotBits * 2));
p->rc.range = range; p->rc.range = range;
@@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
{ {
unsigned m = 1; unsigned m = 1;
unsigned bit; unsigned bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
p->rc.range = range; p->rc.range = range;
// p->alignPriceCount++; // p->alignPriceCount++;
} }
@@ -2704,7 +2691,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
if (!RangeEnc_Alloc(&p->rc, alloc)) if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
#ifndef _7ZIP_ST #ifndef Z7_ST
p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif #endif
@@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
(numFastBytes + LZMA_MATCH_LEN_MAX + 1) (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
*/ */
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtMode) if (p->mtMode)
{ {
RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig)); , allocBig))
p->matchFinderObj = &p->matchFinderMt; p->matchFinderObj = &p->matchFinderMt;
MFB.bigHash = (Byte)( MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
(p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
} }
else else
@@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr
p->finished = False; p->finished = False;
p->result = SZ_OK; p->result = SZ_OK;
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); p->nowPos64 = 0;
p->needInit = 1;
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
LzmaEnc_Init(p); LzmaEnc_Init(p);
LzmaEnc_InitPrices(p); LzmaEnc_InitPrices(p);
p->nowPos64 = 0;
return SZ_OK; return SZ_OK;
} }
static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
ISeqOutStreamPtr outStream,
ISeqInStreamPtr inStream,
ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
MFB.stream = inStream; MatchFinder_SET_STREAM(&MFB, inStream)
p->needInit = 1;
p->rc.outStream = outStream; p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
} }
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
ISeqInStream *inStream, UInt32 keepWindowSize, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
MFB.stream = inStream; MatchFinder_SET_STREAM(&MFB, inStream)
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
} }
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
const Byte *src, SizeT srcLen,
UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
MFB.directInput = 1; // GET_CLzmaEnc_p
MFB.bufferBase = (Byte *)src; MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
MFB.directInputRem = srcLen; LzmaEnc_SetDataSize(p, srcLen);
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
LzmaEnc_SetInputBuf(p, src, srcLen);
p->needInit = 1;
LzmaEnc_SetDataSize(pp, srcLen);
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
} }
void LzmaEnc_Finish(CLzmaEncHandle pp) void LzmaEnc_Finish(CLzmaEncHandle p)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
if (p->mtMode) if (p->mtMode)
MatchFinderMt_ReleaseStream(&p->matchFinderMt); MatchFinderMt_ReleaseStream(&p->matchFinderMt);
#else #else
UNUSED_VAR(pp); UNUSED_VAR(p)
#endif #endif
} }
@@ -2933,13 +2913,13 @@ typedef struct
{ {
ISeqOutStream vt; ISeqOutStream vt;
Byte *data; Byte *data;
SizeT rem; size_t rem;
BoolInt overflow; BoolInt overflow;
} CLzmaEnc_SeqOutStreamBuf; } CLzmaEnc_SeqOutStreamBuf;
static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
{ {
CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
if (p->rem < size) if (p->rem < size)
{ {
size = p->rem; size = p->rem;
@@ -2956,24 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
/* /*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
{ {
const CLzmaEnc *p = (CLzmaEnc *)pp; GET_const_CLzmaEnc_p
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
} }
*/ */
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
{ {
const CLzmaEnc *p = (CLzmaEnc *)pp; // GET_const_CLzmaEnc_p
return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
} }
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, // (desiredPackSize == 0) is not allowed
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
UInt64 nowPos64; UInt64 nowPos64;
SRes res; SRes res;
CLzmaEnc_SeqOutStreamBuf outStream; CLzmaEnc_SeqOutStreamBuf outStream;
@@ -2990,14 +2971,10 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
if (reInit) if (reInit)
LzmaEnc_Init(p); LzmaEnc_Init(p);
LzmaEnc_InitPrices(p); LzmaEnc_InitPrices(p);
nowPos64 = p->nowPos64;
RangeEnc_Init(&p->rc); RangeEnc_Init(&p->rc);
p->rc.outStream = &outStream.vt; p->rc.outStream = &outStream.vt;
nowPos64 = p->nowPos64;
if (desiredPackSize == 0)
return SZ_ERROR_OUTPUT_EOF;
res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
*unpackSize = (UInt32)(p->nowPos64 - nowPos64); *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
@@ -3009,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
} }
MY_NO_INLINE Z7_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
{ {
SRes res = SZ_OK; SRes res = SZ_OK;
#ifndef _7ZIP_ST #ifndef Z7_ST
Byte allocaDummy[0x300]; Byte allocaDummy[0x300];
allocaDummy[0] = 0; allocaDummy[0] = 0;
allocaDummy[1] = allocaDummy[0]; allocaDummy[1] = allocaDummy[0];
@@ -3036,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
} }
} }
LzmaEnc_Finish(p); LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
/* /*
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
@@ -3048,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
} }
SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); // GET_CLzmaEnc_p
return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
return LzmaEnc_Encode2(p, progress);
} }
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
{ {
if (*size < LZMA_PROPS_SIZE) if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM; return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE; *size = LZMA_PROPS_SIZE;
{ {
const CLzmaEnc *p = (const CLzmaEnc *)pp; // GET_CLzmaEnc_p
const UInt32 dictSize = p->dictSize; const UInt32 dictSize = p->dictSize;
UInt32 v; UInt32 v;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
@@ -3086,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
while (v < dictSize); while (v < dictSize);
} }
SetUi32(props + 1, v); SetUi32(props + 1, v)
return SZ_OK; return SZ_OK;
} }
} }
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
{ {
return (unsigned)((CLzmaEnc *)pp)->writeEndMark; // GET_CLzmaEnc_p
return (unsigned)p->writeEndMark;
} }
SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
SRes res; SRes res;
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
CLzmaEnc_SeqOutStreamBuf outStream; CLzmaEnc_SeqOutStreamBuf outStream;
@@ -3114,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
p->writeEndMark = writeEndMark; p->writeEndMark = writeEndMark;
p->rc.outStream = &outStream.vt; p->rc.outStream = &outStream.vt;
res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
if (res == SZ_OK) if (res == SZ_OK)
{ {
@@ -3123,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
res = SZ_ERROR_FAIL; res = SZ_ERROR_FAIL;
} }
*destLen -= outStream.rem; *destLen -= (SizeT)outStream.rem;
if (outStream.overflow) if (outStream.overflow)
return SZ_ERROR_OUTPUT_EOF; return SZ_ERROR_OUTPUT_EOF;
return res; return res;
@@ -3132,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); CLzmaEncHandle p = LzmaEnc_Create(alloc);
SRes res; SRes res;
if (!p) if (!p)
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
@@ -3154,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
/* /*
#ifndef _7ZIP_ST #ifndef Z7_ST
void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
{ {
const CLzmaEnc *p = (CLzmaEnc *)pp; GET_const_CLzmaEnc_p
lz_threads[0] = p->matchFinderMt.hashSync.thread; lz_threads[0] = p->matchFinderMt.hashSync.thread;
lz_threads[1] = p->matchFinderMt.btSync.thread; lz_threads[1] = p->matchFinderMt.btSync.thread;
} }

View File

@@ -1,8 +1,8 @@
/* LzmaEnc.h -- LZMA Encoder /* LzmaEnc.h -- LZMA Encoder
2019-10-30 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H #ifndef ZIP7_INC_LZMA_ENC_H
#define __LZMA_ENC_H #define ZIP7_INC_LZMA_ENC_H
#include "7zTypes.h" #include "7zTypes.h"
@@ -10,7 +10,7 @@ EXTERN_C_BEGIN
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
typedef struct _CLzmaEncProps typedef struct
{ {
int level; /* 0 <= level <= 9 */ int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
@@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps
int fb; /* 5 <= fb <= 273, default = 32 */ int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */ int numHashBytes; /* 2, 3 or 4, default = 4 */
unsigned numHashOutBits; /* default = ? */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */ int numThreads; /* 1 or 2, default = 2 */
// int _pad;
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */ Encoder uses this value to reduce dictionary size */
@@ -51,7 +54,9 @@ SRes:
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/ */
typedef void * CLzmaEncHandle; typedef struct CLzmaEnc CLzmaEnc;
typedef CLzmaEnc * CLzmaEncHandle;
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
@@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
/* ---------- One Call Interface ---------- */ /* ---------- One Call Interface ---------- */
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END EXTERN_C_END

View File

@@ -1,12 +1,14 @@
/* LzmaLib.c -- LZMA library wrapper /* LzmaLib.c -- LZMA library wrapper
2015-06-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Alloc.h" #include "Alloc.h"
#include "LzmaDec.h" #include "LzmaDec.h"
#include "LzmaEnc.h" #include "LzmaEnc.h"
#include "LzmaLib.h" #include "LzmaLib.h"
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, unsigned char *outProps, size_t *outPropsSize,
int level, /* 0 <= level <= 9, default = 5 */ int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
@@ -32,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
} }
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
const unsigned char *props, size_t propsSize) const unsigned char *props, size_t propsSize)
{ {
ELzmaStatus status; ELzmaStatus status;

View File

@@ -1,14 +1,14 @@
/* LzmaLib.h -- LZMA library interface /* LzmaLib.h -- LZMA library interface
2021-04-03 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H #ifndef ZIP7_INC_LZMA_LIB_H
#define __LZMA_LIB_H #define ZIP7_INC_LZMA_LIB_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
#define MY_STDAPI int MY_STD_CALL #define Z7_STDAPI int Z7_STDCALL
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
@@ -100,7 +100,7 @@ Returns:
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/ */
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
int level, /* 0 <= level <= 9, default = 5 */ int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* default = (1 << 24) */ unsigned dictSize, /* default = (1 << 24) */
@@ -130,7 +130,7 @@ Returns:
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
*/ */
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
const unsigned char *props, size_t propsSize); const unsigned char *props, size_t propsSize);
EXTERN_C_END EXTERN_C_END

View File

@@ -1,8 +1,8 @@
/* Precomp.h -- StdAfx /* Precomp.h -- StdAfx
2013-11-12 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H #ifndef ZIP7_INC_PRECOMP_H
#define __7Z_PRECOMP_H #define ZIP7_INC_PRECOMP_H
#include "Compiler.h" #include "Compiler.h"
/* #include "7zTypes.h" */ /* #include "7zTypes.h" */

View File

@@ -1,5 +1,5 @@
/* Threads.c -- multithreading library /* Threads.c -- multithreading library
2021-12-21 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@@ -11,9 +11,9 @@
#include "Threads.h" #include "Threads.h"
static WRes GetError() static WRes GetError(void)
{ {
DWORD res = GetLastError(); const DWORD res = GetLastError();
return res ? (WRes)res : 1; return res ? (WRes)res : 1;
} }
@@ -173,6 +173,9 @@ WRes CriticalSection_Init(CCriticalSection *p)
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
Windows Vista+ : no exceptions */ Windows Vista+ : no exceptions */
#ifdef _MSC_VER #ifdef _MSC_VER
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
#endif
__try __try
#endif #endif
{ {
@@ -193,18 +196,26 @@ WRes CriticalSection_Init(CCriticalSection *p)
// ---------- POSIX ---------- // ---------- POSIX ----------
#ifndef __APPLE__ #ifndef __APPLE__
#ifndef _7ZIP_AFFINITY_DISABLE #ifndef Z7_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
// clang < 3.6 : unknown warning group '-Wreserved-id-macro'
// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
// clang >= 13 : do not give warning
#if !defined(_GNU_SOURCE)
#if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12)
#pragma GCC diagnostic ignored "-Wreserved-id-macro"
#endif
#define _GNU_SOURCE #define _GNU_SOURCE
#endif #endif // !defined(_GNU_SOURCE)
#endif #endif // Z7_AFFINITY_DISABLE
#endif // __APPLE__
#include "Threads.h" #include "Threads.h"
#include <errno.h> #include <errno.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#ifdef _7ZIP_AFFINITY_SUPPORTED #ifdef Z7_AFFINITY_SUPPORTED
// #include <sched.h> // #include <sched.h>
#endif #endif
@@ -212,15 +223,12 @@ WRes CriticalSection_Init(CCriticalSection *p)
// #include <stdio.h> // #include <stdio.h>
// #define PRF(p) p // #define PRF(p) p
#define PRF(p) #define PRF(p)
#define Print(s) PRF(printf("\n%s\n", s);)
#define Print(s) PRF(printf("\n%s\n", s))
// #include <stdio.h>
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
{ {
// new thread in Posix probably inherits affinity from parrent thread // new thread in Posix probably inherits affinity from parrent thread
Print("Thread_Create_With_CpuSet"); Print("Thread_Create_With_CpuSet")
pthread_attr_t attr; pthread_attr_t attr;
int ret; int ret;
@@ -228,7 +236,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
p->_created = 0; p->_created = 0;
RINOK(pthread_attr_init(&attr)); RINOK(pthread_attr_init(&attr))
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
@@ -236,7 +244,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
{ {
if (cpuSet) if (cpuSet)
{ {
#ifdef _7ZIP_AFFINITY_SUPPORTED #ifdef Z7_AFFINITY_SUPPORTED
/* /*
printf("\n affinity :"); printf("\n affinity :");
@@ -292,7 +300,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{ {
Print("Thread_Create_WithAffinity"); Print("Thread_Create_WithAffinity")
CCpuSet cs; CCpuSet cs;
unsigned i; unsigned i;
CpuSet_Zero(&cs); CpuSet_Zero(&cs);
@@ -312,7 +320,7 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param
WRes Thread_Close(CThread *p) WRes Thread_Close(CThread *p)
{ {
// Print("Thread_Close"); // Print("Thread_Close")
int ret; int ret;
if (!p->_created) if (!p->_created)
return 0; return 0;
@@ -326,7 +334,7 @@ WRes Thread_Close(CThread *p)
WRes Thread_Wait_Close(CThread *p) WRes Thread_Wait_Close(CThread *p)
{ {
// Print("Thread_Wait_Close"); // Print("Thread_Wait_Close")
void *thread_return; void *thread_return;
int ret; int ret;
if (!p->_created) if (!p->_created)
@@ -343,8 +351,8 @@ WRes Thread_Wait_Close(CThread *p)
static WRes Event_Create(CEvent *p, int manualReset, int signaled) static WRes Event_Create(CEvent *p, int manualReset, int signaled)
{ {
RINOK(pthread_mutex_init(&p->_mutex, NULL)); RINOK(pthread_mutex_init(&p->_mutex, NULL))
RINOK(pthread_cond_init(&p->_cond, NULL)); RINOK(pthread_cond_init(&p->_cond, NULL))
p->_manual_reset = manualReset; p->_manual_reset = manualReset;
p->_state = (signaled ? True : False); p->_state = (signaled ? True : False);
p->_created = 1; p->_created = 1;
@@ -363,7 +371,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
WRes Event_Set(CEvent *p) WRes Event_Set(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = True; p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond); int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex); int res2 = pthread_mutex_unlock(&p->_mutex);
@@ -372,14 +380,14 @@ WRes Event_Set(CEvent *p)
WRes Event_Reset(CEvent *p) WRes Event_Reset(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = False; p->_state = False;
return pthread_mutex_unlock(&p->_mutex); return pthread_mutex_unlock(&p->_mutex);
} }
WRes Event_Wait(CEvent *p) WRes Event_Wait(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
while (p->_state == False) while (p->_state == False)
{ {
// ETIMEDOUT // ETIMEDOUT
@@ -411,8 +419,8 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{ {
if (initCount > maxCount || maxCount < 1) if (initCount > maxCount || maxCount < 1)
return EINVAL; return EINVAL;
RINOK(pthread_mutex_init(&p->_mutex, NULL)); RINOK(pthread_mutex_init(&p->_mutex, NULL))
RINOK(pthread_cond_init(&p->_cond, NULL)); RINOK(pthread_cond_init(&p->_cond, NULL))
p->_count = initCount; p->_count = initCount;
p->_maxCount = maxCount; p->_maxCount = maxCount;
p->_created = 1; p->_created = 1;
@@ -448,7 +456,7 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
if (releaseCount < 1) if (releaseCount < 1)
return EINVAL; return EINVAL;
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
newCount = p->_count + releaseCount; newCount = p->_count + releaseCount;
if (newCount > p->_maxCount) if (newCount > p->_maxCount)
@@ -458,13 +466,13 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
p->_count = newCount; p->_count = newCount;
ret = pthread_cond_broadcast(&p->_cond); ret = pthread_cond_broadcast(&p->_cond);
} }
RINOK(pthread_mutex_unlock(&p->_mutex)); RINOK(pthread_mutex_unlock(&p->_mutex))
return ret; return ret;
} }
WRes Semaphore_Wait(CSemaphore *p) WRes Semaphore_Wait(CSemaphore *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
while (p->_count < 1) while (p->_count < 1)
{ {
pthread_cond_wait(&p->_cond, &p->_mutex); pthread_cond_wait(&p->_cond, &p->_mutex);
@@ -489,7 +497,7 @@ WRes Semaphore_Close(CSemaphore *p)
WRes CriticalSection_Init(CCriticalSection *p) WRes CriticalSection_Init(CCriticalSection *p)
{ {
// Print("CriticalSection_Init"); // Print("CriticalSection_Init")
if (!p) if (!p)
return EINTR; return EINTR;
return pthread_mutex_init(&p->_mutex, NULL); return pthread_mutex_init(&p->_mutex, NULL);
@@ -497,7 +505,7 @@ WRes CriticalSection_Init(CCriticalSection *p)
void CriticalSection_Enter(CCriticalSection *p) void CriticalSection_Enter(CCriticalSection *p)
{ {
// Print("CriticalSection_Enter"); // Print("CriticalSection_Enter")
if (p) if (p)
{ {
// int ret = // int ret =
@@ -507,7 +515,7 @@ void CriticalSection_Enter(CCriticalSection *p)
void CriticalSection_Leave(CCriticalSection *p) void CriticalSection_Leave(CCriticalSection *p)
{ {
// Print("CriticalSection_Leave"); // Print("CriticalSection_Leave")
if (p) if (p)
{ {
// int ret = // int ret =
@@ -517,7 +525,7 @@ void CriticalSection_Leave(CCriticalSection *p)
void CriticalSection_Delete(CCriticalSection *p) void CriticalSection_Delete(CCriticalSection *p)
{ {
// Print("CriticalSection_Delete"); // Print("CriticalSection_Delete")
if (p) if (p)
{ {
// int ret = // int ret =
@@ -527,14 +535,28 @@ void CriticalSection_Delete(CCriticalSection *p)
LONG InterlockedIncrement(LONG volatile *addend) LONG InterlockedIncrement(LONG volatile *addend)
{ {
// Print("InterlockedIncrement"); // Print("InterlockedIncrement")
#ifdef USE_HACK_UNSAFE_ATOMIC #ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend + 1; LONG val = *addend + 1;
*addend = val; *addend = val;
return val; return val;
#else #else
#if defined(__clang__) && (__clang_major__ >= 8)
#pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst"
#endif
return __sync_add_and_fetch(addend, 1); return __sync_add_and_fetch(addend, 1);
#endif #endif
} }
#endif // _WIN32 #endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
{
if (Event_IsCreated(p))
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
#undef PRF
#undef Print

View File

@@ -1,18 +1,19 @@
/* Threads.h -- multithreading library /* Threads.h -- multithreading library
2021-12-21 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H #ifndef ZIP7_INC_THREADS_H
#define __7Z_THREADS_H #define ZIP7_INC_THREADS_H
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
#else #else
#if defined(__linux__) #if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef _7ZIP_AFFINITY_DISABLE #ifndef Z7_AFFINITY_DISABLE
#define _7ZIP_AFFINITY_SUPPORTED #define Z7_AFFINITY_SUPPORTED
// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") // #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE // #define _GNU_SOURCE
#endif #endif
#endif #endif
@@ -33,7 +34,7 @@ WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread; typedef HANDLE CThread;
#define Thread_Construct(p) { *(p) = NULL; } #define Thread_CONSTRUCT(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL) #define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p) #define Thread_Close(p) HandlePtr_Close(p)
// #define Thread_Wait(p) Handle_WaitObject(*(p)) // #define Thread_Wait(p) Handle_WaitObject(*(p))
@@ -52,42 +53,46 @@ typedef
#endif #endif
THREAD_FUNC_RET_TYPE; THREAD_FUNC_RET_TYPE;
#define THREAD_FUNC_RET_ZERO 0
typedef DWORD_PTR CAffinityMask; typedef DWORD_PTR CAffinityMask;
typedef DWORD_PTR CCpuSet; typedef DWORD_PTR CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; } #define CpuSet_Zero(p) *(p) = (0)
#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } #define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu))
#else // _WIN32 #else // _WIN32
typedef struct _CThread typedef struct
{ {
pthread_t _tid; pthread_t _tid;
int _created; int _created;
} CThread; } CThread;
#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } #define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; }
#define Thread_WasCreated(p) ((p)->_created != 0) #define Thread_WasCreated(p) ((p)->_created != 0)
WRes Thread_Close(CThread *p); WRes Thread_Close(CThread *p);
// #define Thread_Wait Thread_Wait_Close // #define Thread_Wait Thread_Wait_Close
typedef void * THREAD_FUNC_RET_TYPE; typedef void * THREAD_FUNC_RET_TYPE;
#define THREAD_FUNC_RET_ZERO NULL
typedef UInt64 CAffinityMask; typedef UInt64 CAffinityMask;
#ifdef _7ZIP_AFFINITY_SUPPORTED #ifdef Z7_AFFINITY_SUPPORTED
typedef cpu_set_t CCpuSet; typedef cpu_set_t CCpuSet;
#define CpuSet_Zero(p) CPU_ZERO(p) #define CpuSet_Zero(p) CPU_ZERO(p)
#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) #define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) #define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
#else #else
typedef UInt64 CCpuSet; typedef UInt64 CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; } #define CpuSet_Zero(p) *(p) = (0)
#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } #define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu))
#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) #define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
#endif #endif
@@ -95,7 +100,7 @@ typedef UInt64 CCpuSet;
#endif // _WIN32 #endif // _WIN32
#define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_CALL_TYPE Z7_STDCALL
#if defined(_WIN32) && defined(__GNUC__) #if defined(_WIN32) && defined(__GNUC__)
/* GCC compiler for x86 32-bit uses the rule: /* GCC compiler for x86 32-bit uses the rule:
@@ -187,6 +192,7 @@ WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
WRes Event_Set(CEvent *p); WRes Event_Set(CEvent *p);
WRes Event_Reset(CEvent *p); WRes Event_Reset(CEvent *p);
WRes Event_Wait(CEvent *p); WRes Event_Wait(CEvent *p);
@@ -227,6 +233,8 @@ LONG InterlockedIncrement(LONG volatile *addend);
#endif // _WIN32 #endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p);
EXTERN_C_END EXTERN_C_END
#endif #endif

View File

@@ -1,6 +1,46 @@
HISTORY of the LZMA SDK HISTORY of the LZMA SDK
----------------------- -----------------------
23.01 2023-06-20
-------------------------
- 7-Zip now can use new ARM64 filter for compression to 7z and xz archives.
ARM64 filter can increase compression ratio for data containing executable
files compiled for ARM64 (AArch64) architecture.
Also 7-Zip now parses executable files (that have exe and dll filename extensions)
before compressing, and it selects appropriate filter for each parsed file:
- BCJ or BCJ2 filter for x86 executable files,
- ARM64 filter for ARM64 executable files.
Previous versions by default used x86 filter BCJ or BCJ2 for all exe/dll files.
- Default section size for BCJ2 filter was changed from 64 MiB to 240 MiB.
It can increase compression ratio for executable files larger than 64 MiB.
- Some optimizations in filters code: BCJ, BCJ2, Swap* and opthers.
- If 7-Zip uses BCJ2 filter for big datasets compressing, it can use additional temp
files in system's TEMP folder. 7-Zip uses temp file for additional compressed
data stream, if size of such compressed stream is larger than predefined limit:
16 MiB in 32-bit version, 4 GiB in 64-bit version.
- When new 7-Zip creates multivolume archive, 7-Zip keeps in open state
only volumes that still can be changed. Previous versions kept all volumes
in open state until the end of the archive creation.
- 7-Zip for Linux and macOS now can reduce the number of simultaneously open files,
when 7-Zip opens, extracts or creates multivolume archive. It allows to avoid
the failures for cases with big number of volumes, bacause there is a limitation
for number of open files allowed for a single program in Linux and macOS.
- Some bugs were fixed.
- Source code changes:
- All external macros for compiling C/C++ code of 7-Zip now have Z7_ prefix.
- 7-Zip COM interfaces now use new macros that allow to declare and implement COM interface.
- The code has been modified to compile with the maximum diagnostic warning level:
-Wall in MSVC and -Weverything in CLANG.
And some warning types are disabled in 2 files:
- C/Compiler.h for C/C++ code warnings.
- CPP/Common/Common.h for C++ code warnings.
- Linux/macOS versions of 7-Zip: IUnknown interface in new code doesn't use
virtual destructor that was used in previous 7-Zip and p7zip:
// virtual ~IUnknown() {}
So 7-Zip's dynamically linked shared libraries (codecs) are not compatible
between new 7-Zip for Linux/macOS and old 7-Zip (and p7zip).
21.07 2021-12-26 21.07 2021-12-26
------------------------- -------------------------
- New switches: -spm and -im!{file_path} to exclude directories from processing - New switches: -spm and -im!{file_path} to exclude directories from processing

View File

@@ -1,4 +1,4 @@
LZMA SDK 21.07 LZMA SDK 23.01
-------------- --------------
LZMA SDK provides the documentation, samples, header files, LZMA SDK provides the documentation, samples, header files,