mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-11 17:28:36 +12:00
Update 7z to 23.01
This commit is contained in:
parent
98b7642232
commit
00a80adfae
100 changed files with 11232 additions and 3906 deletions
519
3rdparty/7z/src/LzFind.c
vendored
519
3rdparty/7z/src/LzFind.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/* LzFind.c -- Match finder for LZ algorithms
|
||||
2021-11-29 : Igor Pavlov : Public domain */
|
||||
2023-03-14 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
|||
#define kEmptyHashValue 0
|
||||
|
||||
#define kMaxValForNormalize ((UInt32)0)
|
||||
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug
|
||||
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
|
||||
|
||||
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
|
||||
|
||||
|
@ -67,10 +67,10 @@
|
|||
|
||||
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
|
||||
{
|
||||
if (!p->directInput)
|
||||
// if (!p->directInput)
|
||||
{
|
||||
ISzAlloc_Free(alloc, p->bufferBase);
|
||||
p->bufferBase = NULL;
|
||||
ISzAlloc_Free(alloc, p->bufBase);
|
||||
p->bufBase = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
|
|||
{
|
||||
if (blockSize == 0)
|
||||
return 0;
|
||||
if (!p->bufferBase || p->blockSize != blockSize)
|
||||
if (!p->bufBase || p->blockSize != blockSize)
|
||||
{
|
||||
// size_t blockSizeT;
|
||||
LzInWindow_Free(p, alloc);
|
||||
|
@ -101,11 +101,11 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
|
|||
#endif
|
||||
*/
|
||||
|
||||
p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
|
||||
// printf("\nbufferBase = %p\n", p->bufferBase);
|
||||
p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
|
||||
// printf("\nbufferBase = %p\n", p->bufBase);
|
||||
// return 0; // for debug
|
||||
}
|
||||
return (p->bufferBase != NULL);
|
||||
return (p->bufBase != NULL);
|
||||
}
|
||||
|
||||
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
|
||||
|
@ -113,7 +113,7 @@ static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return
|
|||
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void MatchFinder_ReadBlock(CMatchFinder *p)
|
||||
{
|
||||
if (p->streamEndWasReached || p->result != SZ_OK)
|
||||
|
@ -127,8 +127,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
|
|||
UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
|
||||
if (curSize > p->directInputRem)
|
||||
curSize = (UInt32)p->directInputRem;
|
||||
p->directInputRem -= curSize;
|
||||
p->streamPos += curSize;
|
||||
p->directInputRem -= curSize;
|
||||
if (p->directInputRem == 0)
|
||||
p->streamEndWasReached = 1;
|
||||
return;
|
||||
|
@ -136,8 +136,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
|
|||
|
||||
for (;;)
|
||||
{
|
||||
Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
|
||||
size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
|
||||
const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
|
||||
size_t size = (size_t)(p->bufBase + p->blockSize - dest);
|
||||
if (size == 0)
|
||||
{
|
||||
/* we call ReadBlock() after NeedMove() and MoveBlock().
|
||||
|
@ -153,7 +153,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
|
|||
// #define kRead 3
|
||||
// if (size > kRead) size = kRead; // for debug
|
||||
|
||||
p->result = ISeqInStream_Read(p->stream, dest, &size);
|
||||
/*
|
||||
// we need cast (Byte *)dest.
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic ignored "-Wcast-qual"
|
||||
#endif
|
||||
*/
|
||||
p->result = ISeqInStream_Read(p->stream,
|
||||
p->bufBase + (dest - p->bufBase), &size);
|
||||
if (p->result != SZ_OK)
|
||||
return;
|
||||
if (size == 0)
|
||||
|
@ -173,14 +180,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
|
|||
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
void MatchFinder_MoveBlock(CMatchFinder *p)
|
||||
{
|
||||
const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;
|
||||
const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
|
||||
const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
|
||||
p->buffer = p->bufferBase + keepBefore;
|
||||
memmove(p->bufferBase,
|
||||
p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
|
||||
p->buffer = p->bufBase + keepBefore;
|
||||
memmove(p->bufBase,
|
||||
p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
|
||||
keepBefore + (size_t)GET_AVAIL_BYTES(p));
|
||||
}
|
||||
|
||||
|
@ -198,7 +205,7 @@ int MatchFinder_NeedMove(CMatchFinder *p)
|
|||
return 0;
|
||||
if (p->streamEndWasReached || p->result != SZ_OK)
|
||||
return 0;
|
||||
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
|
||||
return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
|
||||
}
|
||||
|
||||
void MatchFinder_ReadIfRequired(CMatchFinder *p)
|
||||
|
@ -214,6 +221,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
|
|||
p->cutValue = 32;
|
||||
p->btMode = 1;
|
||||
p->numHashBytes = 4;
|
||||
p->numHashBytes_Min = 2;
|
||||
p->numHashOutBits = 0;
|
||||
p->bigHash = 0;
|
||||
}
|
||||
|
||||
|
@ -222,8 +231,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
|
|||
void MatchFinder_Construct(CMatchFinder *p)
|
||||
{
|
||||
unsigned i;
|
||||
p->bufferBase = NULL;
|
||||
p->buffer = NULL;
|
||||
p->bufBase = NULL;
|
||||
p->directInput = 0;
|
||||
p->stream = NULL;
|
||||
p->hash = NULL;
|
||||
p->expectedDataSize = (UInt64)(Int64)-1;
|
||||
MatchFinder_SetDefaultSettings(p);
|
||||
|
@ -238,6 +249,8 @@ void MatchFinder_Construct(CMatchFinder *p)
|
|||
}
|
||||
}
|
||||
|
||||
#undef kCrcPoly
|
||||
|
||||
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
|
||||
{
|
||||
ISzAlloc_Free(alloc, p->hash);
|
||||
|
@ -252,7 +265,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
|
|||
|
||||
static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
|
||||
{
|
||||
size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
|
||||
const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
|
||||
if (sizeInBytes / sizeof(CLzRef) != num)
|
||||
return NULL;
|
||||
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
|
||||
|
@ -298,6 +311,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
|
|||
}
|
||||
|
||||
|
||||
// input is historySize
|
||||
static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
|
||||
{
|
||||
if (p->numHashBytes == 2)
|
||||
return (1 << 16) - 1;
|
||||
if (hs != 0)
|
||||
hs--;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
// we propagated 16 bits in (hs). Low 16 bits must be set later
|
||||
if (hs >= (1 << 24))
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
|
||||
}
|
||||
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
|
||||
hs |= (1 << 16) - 1; /* don't change it! */
|
||||
// bt5: we adjust the size with recommended minimum size
|
||||
if (p->numHashBytes >= 5)
|
||||
hs |= (256 << kLzHash_CrcShift_2) - 1;
|
||||
return hs;
|
||||
}
|
||||
|
||||
// input is historySize
|
||||
static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
|
||||
{
|
||||
if (p->numHashBytes == 2)
|
||||
return (1 << 16) - 1;
|
||||
if (hs != 0)
|
||||
hs--;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
// we propagated 16 bits in (hs). Low 16 bits must be set later
|
||||
hs >>= 1;
|
||||
if (hs >= (1 << 24))
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
else
|
||||
hs >>= 1;
|
||||
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
|
||||
}
|
||||
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
|
||||
hs |= (1 << 16) - 1; /* don't change it! */
|
||||
// bt5: we adjust the size with recommended minimum size
|
||||
if (p->numHashBytes >= 5)
|
||||
hs |= (256 << kLzHash_CrcShift_2) - 1;
|
||||
return hs;
|
||||
}
|
||||
|
||||
|
||||
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
||||
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
|
||||
ISzAllocPtr alloc)
|
||||
|
@ -318,78 +387,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
|||
p->blockSize = 0;
|
||||
if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
|
||||
{
|
||||
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
|
||||
UInt32 hs;
|
||||
p->matchMaxLen = matchMaxLen;
|
||||
size_t hashSizeSum;
|
||||
{
|
||||
// UInt32 hs4;
|
||||
p->fixedHashSize = 0;
|
||||
hs = (1 << 16) - 1;
|
||||
if (p->numHashBytes != 2)
|
||||
UInt32 hs;
|
||||
UInt32 hsCur;
|
||||
|
||||
if (p->numHashOutBits != 0)
|
||||
{
|
||||
hs = historySize;
|
||||
if (hs > p->expectedDataSize)
|
||||
hs = (UInt32)p->expectedDataSize;
|
||||
if (hs != 0)
|
||||
hs--;
|
||||
hs |= (hs >> 1);
|
||||
hs |= (hs >> 2);
|
||||
hs |= (hs >> 4);
|
||||
hs |= (hs >> 8);
|
||||
// we propagated 16 bits in (hs). Low 16 bits must be set later
|
||||
hs >>= 1;
|
||||
if (hs >= (1 << 24))
|
||||
{
|
||||
if (p->numHashBytes == 3)
|
||||
hs = (1 << 24) - 1;
|
||||
else
|
||||
hs >>= 1;
|
||||
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
|
||||
}
|
||||
|
||||
// hs = ((UInt32)1 << 25) - 1; // for test
|
||||
|
||||
unsigned numBits = p->numHashOutBits;
|
||||
const unsigned nbMax =
|
||||
(p->numHashBytes == 2 ? 16 :
|
||||
(p->numHashBytes == 3 ? 24 : 32));
|
||||
if (numBits > nbMax)
|
||||
numBits = nbMax;
|
||||
if (numBits >= 32)
|
||||
hs = (UInt32)0 - 1;
|
||||
else
|
||||
hs = ((UInt32)1 << numBits) - 1;
|
||||
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
|
||||
hs |= (1 << 16) - 1; /* don't change it! */
|
||||
|
||||
// bt5: we adjust the size with recommended minimum size
|
||||
if (p->numHashBytes >= 5)
|
||||
hs |= (256 << kLzHash_CrcShift_2) - 1;
|
||||
{
|
||||
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
|
||||
if (hs > hs2)
|
||||
hs = hs2;
|
||||
}
|
||||
hsCur = hs;
|
||||
if (p->expectedDataSize < historySize)
|
||||
{
|
||||
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
|
||||
if (hsCur > hs2)
|
||||
hsCur = hs2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
hs = MatchFinder_GetHashMask(p, historySize);
|
||||
hsCur = hs;
|
||||
if (p->expectedDataSize < historySize)
|
||||
{
|
||||
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
|
||||
if (hsCur > hs) // is it possible?
|
||||
hsCur = hs;
|
||||
}
|
||||
}
|
||||
p->hashMask = hs;
|
||||
hs++;
|
||||
|
||||
/*
|
||||
hs4 = (1 << 20);
|
||||
if (hs4 > hs)
|
||||
hs4 = hs;
|
||||
// hs4 = (1 << 16); // for test
|
||||
p->hash4Mask = hs4 - 1;
|
||||
*/
|
||||
p->hashMask = hsCur;
|
||||
|
||||
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
|
||||
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
|
||||
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
|
||||
hs += p->fixedHashSize;
|
||||
hashSizeSum = hs;
|
||||
hashSizeSum++;
|
||||
if (hashSizeSum < hs)
|
||||
return 0;
|
||||
{
|
||||
UInt32 fixedHashSize = 0;
|
||||
if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
|
||||
if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
|
||||
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
|
||||
hashSizeSum += fixedHashSize;
|
||||
p->fixedHashSize = fixedHashSize;
|
||||
}
|
||||
}
|
||||
|
||||
p->matchMaxLen = matchMaxLen;
|
||||
|
||||
{
|
||||
size_t newSize;
|
||||
size_t numSons;
|
||||
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
|
||||
p->historySize = historySize;
|
||||
p->hashSizeSum = hs;
|
||||
p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
|
||||
|
||||
numSons = newCyclicBufferSize;
|
||||
if (p->btMode)
|
||||
numSons <<= 1;
|
||||
newSize = hs + numSons;
|
||||
newSize = hashSizeSum + numSons;
|
||||
|
||||
if (numSons < newCyclicBufferSize || newSize < numSons)
|
||||
return 0;
|
||||
|
||||
// aligned size is not required here, but it can be better for some loops
|
||||
#define NUM_REFS_ALIGN_MASK 0xF
|
||||
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
|
||||
|
||||
if (p->hash && p->numRefs == newSize)
|
||||
// 22.02: we don't reallocate buffer, if old size is enough
|
||||
if (p->hash && p->numRefs >= newSize)
|
||||
return 1;
|
||||
|
||||
MatchFinder_FreeThisClassMemory(p, alloc);
|
||||
|
@ -398,7 +480,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
|||
|
||||
if (p->hash)
|
||||
{
|
||||
p->son = p->hash + p->hashSizeSum;
|
||||
p->son = p->hash + hashSizeSum;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -470,7 +552,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
|
|||
|
||||
void MatchFinder_Init_4(CMatchFinder *p)
|
||||
{
|
||||
p->buffer = p->bufferBase;
|
||||
if (!p->directInput)
|
||||
p->buffer = p->bufBase;
|
||||
{
|
||||
/* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
|
||||
the code in CMatchFinderMt expects (pos = 1) */
|
||||
|
@ -507,20 +590,20 @@ void MatchFinder_Init(CMatchFinder *p)
|
|||
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 8) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
|
||||
#define USE_SATUR_SUB_128
|
||||
#define USE_AVX2
|
||||
#define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
|
||||
#define ATTRIB_AVX2 __attribute__((__target__("avx2")))
|
||||
#if defined(__clang__) && (__clang_major__ >= 4) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
|
||||
// || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
|
||||
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#define USE_LZFIND_SATUR_SUB_256
|
||||
#define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
|
||||
#define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
|
||||
#elif defined(_MSC_VER)
|
||||
#if (_MSC_VER >= 1600)
|
||||
#define USE_SATUR_SUB_128
|
||||
#if (_MSC_VER >= 1900)
|
||||
#define USE_AVX2
|
||||
#include <immintrin.h> // avx
|
||||
#endif
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#endif
|
||||
#if (_MSC_VER >= 1900)
|
||||
#define USE_LZFIND_SATUR_SUB_256
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -529,16 +612,16 @@ void MatchFinder_Init(CMatchFinder *p)
|
|||
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 8)
|
||||
#define USE_SATUR_SUB_128
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#ifdef MY_CPU_ARM64
|
||||
// #define ATTRIB_SSE41 __attribute__((__target__("")))
|
||||
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
|
||||
#else
|
||||
// #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
|
||||
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
|
||||
#endif
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
#if (_MSC_VER >= 1910)
|
||||
#define USE_SATUR_SUB_128
|
||||
#define USE_LZFIND_SATUR_SUB_128
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -550,121 +633,130 @@ void MatchFinder_Init(CMatchFinder *p)
|
|||
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifndef ATTRIB_SSE41
|
||||
#define ATTRIB_SSE41
|
||||
#endif
|
||||
#ifndef ATTRIB_AVX2
|
||||
#define ATTRIB_AVX2
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef USE_SATUR_SUB_128
|
||||
#ifdef USE_LZFIND_SATUR_SUB_128
|
||||
|
||||
// #define _SHOW_HW_STATUS
|
||||
// #define Z7_SHOW_HW_STATUS
|
||||
|
||||
#ifdef _SHOW_HW_STATUS
|
||||
#ifdef Z7_SHOW_HW_STATUS
|
||||
#include <stdio.h>
|
||||
#define _PRF(x) x
|
||||
_PRF(;)
|
||||
#define PRF(x) x
|
||||
PRF(;)
|
||||
#else
|
||||
#define _PRF(x)
|
||||
#define PRF(x)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
|
||||
#ifdef MY_CPU_ARM64
|
||||
// #define FORCE_SATUR_SUB_128
|
||||
// #define FORCE_LZFIND_SATUR_SUB_128
|
||||
#endif
|
||||
typedef uint32x4_t LzFind_v128;
|
||||
#define SASUB_128_V(v, s) \
|
||||
vsubq_u32(vmaxq_u32(v, s), s)
|
||||
|
||||
typedef uint32x4_t v128;
|
||||
#define SASUB_128(i) \
|
||||
*(v128 *)(void *)(items + (i) * 4) = \
|
||||
vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
|
||||
|
||||
#else
|
||||
#else // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
#include <smmintrin.h> // sse4.1
|
||||
|
||||
typedef __m128i v128;
|
||||
typedef __m128i LzFind_v128;
|
||||
// SSE 4.1
|
||||
#define SASUB_128_V(v, s) \
|
||||
_mm_sub_epi32(_mm_max_epu32(v, s), s)
|
||||
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
|
||||
#define SASUB_128(i) \
|
||||
*(v128 *)(void *)(items + (i) * 4) = \
|
||||
_mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1
|
||||
|
||||
#endif
|
||||
*( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
|
||||
*(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
|
||||
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
#ifdef ATTRIB_SSE41
|
||||
ATTRIB_SSE41
|
||||
#ifdef LZFIND_ATTRIB_SSE41
|
||||
LZFIND_ATTRIB_SSE41
|
||||
#endif
|
||||
void
|
||||
MY_FAST_CALL
|
||||
Z7_FASTCALL
|
||||
LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
||||
{
|
||||
v128 sub2 =
|
||||
const LzFind_v128 sub2 =
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
vdupq_n_u32(subValue);
|
||||
#else
|
||||
_mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
|
||||
#endif
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
do
|
||||
{
|
||||
SASUB_128(0)
|
||||
SASUB_128(1)
|
||||
SASUB_128(2)
|
||||
SASUB_128(3)
|
||||
items += 4 * 4;
|
||||
SASUB_128(0) SASUB_128(1) items += 2 * 4;
|
||||
SASUB_128(0) SASUB_128(1) items += 2 * 4;
|
||||
}
|
||||
while (items != lim);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef USE_AVX2
|
||||
#ifdef USE_LZFIND_SATUR_SUB_256
|
||||
|
||||
#include <immintrin.h> // avx
|
||||
/*
|
||||
clang :immintrin.h uses
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX2__)
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
so we need <avxintrin.h> for clang-cl */
|
||||
|
||||
#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2
|
||||
#if defined(__clang__)
|
||||
#include <avxintrin.h>
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
|
||||
MY_NO_INLINE
|
||||
// AVX2:
|
||||
#define SASUB_256(i) \
|
||||
*( __m256i *)( void *)(items + (i) * 8) = \
|
||||
_mm256_sub_epi32(_mm256_max_epu32( \
|
||||
*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
|
||||
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
#ifdef ATTRIB_AVX2
|
||||
ATTRIB_AVX2
|
||||
#ifdef LZFIND_ATTRIB_AVX2
|
||||
LZFIND_ATTRIB_AVX2
|
||||
#endif
|
||||
void
|
||||
MY_FAST_CALL
|
||||
Z7_FASTCALL
|
||||
LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
||||
{
|
||||
__m256i sub2 = _mm256_set_epi32(
|
||||
const __m256i sub2 = _mm256_set_epi32(
|
||||
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
|
||||
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
do
|
||||
{
|
||||
SASUB_256(0)
|
||||
SASUB_256(1)
|
||||
items += 2 * 8;
|
||||
SASUB_256(0) SASUB_256(1) items += 2 * 8;
|
||||
SASUB_256(0) SASUB_256(1) items += 2 * 8;
|
||||
}
|
||||
while (items != lim);
|
||||
}
|
||||
#endif // USE_AVX2
|
||||
#endif // USE_LZFIND_SATUR_SUB_256
|
||||
|
||||
#ifndef FORCE_SATUR_SUB_128
|
||||
typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(
|
||||
#ifndef FORCE_LZFIND_SATUR_SUB_128
|
||||
typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
|
||||
UInt32 subValue, CLzRef *items, const CLzRef *lim);
|
||||
static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
|
||||
#endif // FORCE_SATUR_SUB_128
|
||||
#endif // FORCE_LZFIND_SATUR_SUB_128
|
||||
|
||||
#endif // USE_SATUR_SUB_128
|
||||
#endif // USE_LZFIND_SATUR_SUB_128
|
||||
|
||||
|
||||
// kEmptyHashValue must be zero
|
||||
// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m;
|
||||
#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue;
|
||||
// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
|
||||
#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
|
||||
|
||||
#ifdef FORCE_SATUR_SUB_128
|
||||
#ifdef FORCE_LZFIND_SATUR_SUB_128
|
||||
|
||||
#define DEFAULT_SaturSub LzFind_SaturSub_128
|
||||
|
||||
|
@ -672,24 +764,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
|
|||
|
||||
#define DEFAULT_SaturSub LzFind_SaturSub_32
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
void
|
||||
MY_FAST_CALL
|
||||
Z7_FASTCALL
|
||||
LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
||||
{
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
do
|
||||
{
|
||||
UInt32 v;
|
||||
SASUB_32(0)
|
||||
SASUB_32(1)
|
||||
SASUB_32(2)
|
||||
SASUB_32(3)
|
||||
SASUB_32(4)
|
||||
SASUB_32(5)
|
||||
SASUB_32(6)
|
||||
SASUB_32(7)
|
||||
items += 8;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
SASUB_32(0) SASUB_32(1) items += 2;
|
||||
}
|
||||
while (items != lim);
|
||||
}
|
||||
|
@ -697,27 +784,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
|
|||
#endif
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
|
||||
{
|
||||
#define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)
|
||||
|
||||
CLzRef *lim;
|
||||
|
||||
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
|
||||
#define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
|
||||
{
|
||||
UInt32 v;
|
||||
SASUB_32(0);
|
||||
SASUB_32(0)
|
||||
items++;
|
||||
}
|
||||
|
||||
{
|
||||
#define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)
|
||||
lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);
|
||||
numItems &= K_NORM_ALIGN_MASK;
|
||||
const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
|
||||
CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
|
||||
numItems &= k_Align_Mask;
|
||||
if (items != lim)
|
||||
{
|
||||
#if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)
|
||||
#if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
|
||||
if (g_LzFind_SaturSub)
|
||||
g_LzFind_SaturSub(subValue, items, lim);
|
||||
else
|
||||
|
@ -726,12 +809,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
|
|||
}
|
||||
items = lim;
|
||||
}
|
||||
|
||||
|
||||
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
for (; numItems != 0; numItems--)
|
||||
{
|
||||
UInt32 v;
|
||||
SASUB_32(0);
|
||||
SASUB_32(0)
|
||||
items++;
|
||||
}
|
||||
}
|
||||
|
@ -740,7 +821,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
|
|||
|
||||
// call MatchFinder_CheckLimits() only after (p->pos++) update
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void MatchFinder_CheckLimits(CMatchFinder *p)
|
||||
{
|
||||
if (// !p->streamEndWasReached && p->result == SZ_OK &&
|
||||
|
@ -768,11 +849,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
|
|||
const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
|
||||
// const UInt32 subValue = (1 << 15); // for debug
|
||||
// printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
|
||||
size_t numSonRefs = p->cyclicBufferSize;
|
||||
if (p->btMode)
|
||||
numSonRefs <<= 1;
|
||||
Inline_MatchFinder_ReduceOffsets(p, subValue);
|
||||
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);
|
||||
MatchFinder_REDUCE_OFFSETS(p, subValue)
|
||||
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
|
||||
{
|
||||
size_t numSonRefs = p->cyclicBufferSize;
|
||||
if (p->btMode)
|
||||
numSonRefs <<= 1;
|
||||
MatchFinder_Normalize3(subValue, p->son, numSonRefs);
|
||||
}
|
||||
}
|
||||
|
||||
if (p->cyclicBufferPos == p->cyclicBufferSize)
|
||||
|
@ -785,7 +869,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
|
|||
/*
|
||||
(lenLimit > maxLen)
|
||||
*/
|
||||
MY_FORCE_INLINE
|
||||
Z7_FORCE_INLINE
|
||||
static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *d, unsigned maxLen)
|
||||
|
@ -867,7 +951,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
|
|||
}
|
||||
|
||||
|
||||
MY_FORCE_INLINE
|
||||
Z7_FORCE_INLINE
|
||||
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
|
||||
UInt32 *d, UInt32 maxLen)
|
||||
|
@ -1004,7 +1088,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
|
|||
|
||||
#define MOVE_POS_RET MOVE_POS return distances;
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void MatchFinder_MovePos(CMatchFinder *p)
|
||||
{
|
||||
/* we go here at the end of stream data, when (avail < num_hash_bytes)
|
||||
|
@ -1015,11 +1099,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
|
|||
if (p->btMode)
|
||||
p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
|
||||
*/
|
||||
MOVE_POS;
|
||||
MOVE_POS
|
||||
}
|
||||
|
||||
#define GET_MATCHES_HEADER2(minLen, ret_op) \
|
||||
unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \
|
||||
unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
|
||||
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
|
||||
cur = p->buffer;
|
||||
|
||||
|
@ -1028,11 +1112,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
|
|||
|
||||
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
|
||||
|
||||
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);
|
||||
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
|
||||
|
||||
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
|
||||
distances = func(MF_PARAMS(p), \
|
||||
distances, (UInt32)_maxLen_); MOVE_POS_RET;
|
||||
distances, (UInt32)_maxLen_); MOVE_POS_RET
|
||||
|
||||
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
|
||||
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
|
||||
|
@ -1052,7 +1136,7 @@ static void MatchFinder_MovePos(CMatchFinder *p)
|
|||
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
GET_MATCHES_HEADER(2)
|
||||
HASH2_CALC;
|
||||
HASH2_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
GET_MATCHES_FOOTER_BT(1)
|
||||
|
@ -1061,7 +1145,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
HASH_ZIP_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
GET_MATCHES_FOOTER_BT(2)
|
||||
|
@ -1082,7 +1166,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(3)
|
||||
|
||||
HASH3_CALC;
|
||||
HASH3_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
@ -1107,7 +1191,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(MF_PARAMS(p));
|
||||
MOVE_POS_RET;
|
||||
MOVE_POS_RET
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1123,7 +1207,7 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC;
|
||||
HASH4_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
@ -1190,7 +1274,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(5)
|
||||
|
||||
HASH5_CALC;
|
||||
HASH5_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
@ -1246,7 +1330,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
if (maxLen == lenLimit)
|
||||
{
|
||||
SkipMatchesSpec(MF_PARAMS(p));
|
||||
MOVE_POS_RET;
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1263,7 +1347,7 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(4)
|
||||
|
||||
HASH4_CALC;
|
||||
HASH4_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
@ -1314,12 +1398,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
if (maxLen == lenLimit)
|
||||
{
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS_RET;
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
GET_MATCHES_FOOTER_HC(maxLen);
|
||||
GET_MATCHES_FOOTER_HC(maxLen)
|
||||
}
|
||||
|
||||
|
||||
|
@ -1330,7 +1414,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
UInt32 *hash;
|
||||
GET_MATCHES_HEADER(5)
|
||||
|
||||
HASH5_CALC;
|
||||
HASH5_CALC
|
||||
|
||||
hash = p->hash;
|
||||
pos = p->pos;
|
||||
|
@ -1386,19 +1470,19 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
|||
if (maxLen == lenLimit)
|
||||
{
|
||||
p->son[p->cyclicBufferPos] = curMatch;
|
||||
MOVE_POS_RET;
|
||||
MOVE_POS_RET
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
GET_MATCHES_FOOTER_HC(maxLen);
|
||||
GET_MATCHES_FOOTER_HC(maxLen)
|
||||
}
|
||||
|
||||
|
||||
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
|
||||
{
|
||||
GET_MATCHES_HEADER(3)
|
||||
HASH_ZIP_CALC;
|
||||
HASH_ZIP_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
GET_MATCHES_FOOTER_HC(2)
|
||||
|
@ -1409,7 +1493,7 @@ static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
{
|
||||
SKIP_HEADER(2)
|
||||
{
|
||||
HASH2_CALC;
|
||||
HASH2_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
}
|
||||
|
@ -1420,7 +1504,7 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
{
|
||||
SKIP_HEADER(3)
|
||||
{
|
||||
HASH_ZIP_CALC;
|
||||
HASH_ZIP_CALC
|
||||
curMatch = p->hash[hv];
|
||||
p->hash[hv] = p->pos;
|
||||
}
|
||||
|
@ -1433,7 +1517,7 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
{
|
||||
UInt32 h2;
|
||||
UInt32 *hash;
|
||||
HASH3_CALC;
|
||||
HASH3_CALC
|
||||
hash = p->hash;
|
||||
curMatch = (hash + kFix3HashSize)[hv];
|
||||
hash[h2] =
|
||||
|
@ -1448,7 +1532,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
{
|
||||
UInt32 h2, h3;
|
||||
UInt32 *hash;
|
||||
HASH4_CALC;
|
||||
HASH4_CALC
|
||||
hash = p->hash;
|
||||
curMatch = (hash + kFix4HashSize)[hv];
|
||||
hash [h2] =
|
||||
|
@ -1464,7 +1548,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
{
|
||||
UInt32 h2, h3;
|
||||
UInt32 *hash;
|
||||
HASH5_CALC;
|
||||
HASH5_CALC
|
||||
hash = p->hash;
|
||||
curMatch = (hash + kFix5HashSize)[hv];
|
||||
hash [h2] =
|
||||
|
@ -1478,7 +1562,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
|
||||
#define HC_SKIP_HEADER(minLen) \
|
||||
do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
|
||||
Byte *cur; \
|
||||
const Byte *cur; \
|
||||
UInt32 *hash; \
|
||||
UInt32 *son; \
|
||||
UInt32 pos = p->pos; \
|
||||
|
@ -1510,7 +1594,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
HC_SKIP_HEADER(4)
|
||||
|
||||
UInt32 h2, h3;
|
||||
HASH4_CALC;
|
||||
HASH4_CALC
|
||||
curMatch = (hash + kFix4HashSize)[hv];
|
||||
hash [h2] =
|
||||
(hash + kFix3HashSize)[h3] =
|
||||
|
@ -1540,7 +1624,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
|
|||
{
|
||||
HC_SKIP_HEADER(3)
|
||||
|
||||
HASH_ZIP_CALC;
|
||||
HASH_ZIP_CALC
|
||||
curMatch = hash[hv];
|
||||
hash[hv] = pos;
|
||||
|
||||
|
@ -1590,17 +1674,17 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
|
|||
|
||||
|
||||
|
||||
void LzFindPrepare()
|
||||
void LzFindPrepare(void)
|
||||
{
|
||||
#ifndef FORCE_SATUR_SUB_128
|
||||
#ifdef USE_SATUR_SUB_128
|
||||
#ifndef FORCE_LZFIND_SATUR_SUB_128
|
||||
#ifdef USE_LZFIND_SATUR_SUB_128
|
||||
LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
|
||||
#ifdef MY_CPU_ARM_OR_ARM64
|
||||
{
|
||||
if (CPU_IsSupported_NEON())
|
||||
{
|
||||
// #pragma message ("=== LzFind NEON")
|
||||
_PRF(printf("\n=== LzFind NEON\n"));
|
||||
PRF(printf("\n=== LzFind NEON\n"));
|
||||
f = LzFind_SaturSub_128;
|
||||
}
|
||||
// f = 0; // for debug
|
||||
|
@ -1609,20 +1693,25 @@ void LzFindPrepare()
|
|||
if (CPU_IsSupported_SSE41())
|
||||
{
|
||||
// #pragma message ("=== LzFind SSE41")
|
||||
_PRF(printf("\n=== LzFind SSE41\n"));
|
||||
PRF(printf("\n=== LzFind SSE41\n"));
|
||||
f = LzFind_SaturSub_128;
|
||||
|
||||
#ifdef USE_AVX2
|
||||
#ifdef USE_LZFIND_SATUR_SUB_256
|
||||
if (CPU_IsSupported_AVX2())
|
||||
{
|
||||
// #pragma message ("=== LzFind AVX2")
|
||||
_PRF(printf("\n=== LzFind AVX2\n"));
|
||||
PRF(printf("\n=== LzFind AVX2\n"));
|
||||
f = LzFind_SaturSub_256;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
g_LzFind_SaturSub = f;
|
||||
#endif // USE_SATUR_SUB_128
|
||||
#endif // FORCE_SATUR_SUB_128
|
||||
#endif // USE_LZFIND_SATUR_SUB_128
|
||||
#endif // FORCE_LZFIND_SATUR_SUB_128
|
||||
}
|
||||
|
||||
|
||||
#undef MOVE_POS
|
||||
#undef MOVE_POS_RET
|
||||
#undef PRF
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue