mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-21 06:45:39 -06:00
plugin/zstd: updated to 1.5.6, GridCtrl: Fixed unitialised variable in GridClipboard
This commit is contained in:
parent
85c30ddff5
commit
3513dc4d47
66 changed files with 24977 additions and 7880 deletions
|
|
@ -271,9 +271,11 @@ files
|
|||
lz4.c
|
||||
lz4.h
|
||||
|
||||
put to Core/lib
|
||||
put to Core/lib (rename the LICENSE)
|
||||
put to lz4/
|
||||
|
||||
(in lz4 sources are actually inactive)
|
||||
|
||||
initial: 1.7.3
|
||||
2019-03-22: 1.8.3
|
||||
2020-09-03: 1.9.2
|
||||
|
|
|
|||
|
|
@ -276,7 +276,7 @@ class GridClipboard : Moveable<GridClipboard>
|
|||
|
||||
Vector<ClipboardData> data;
|
||||
Point minpos, maxpos;
|
||||
bool shiftmode;
|
||||
bool shiftmode = false;
|
||||
|
||||
virtual void Serialize(Stream &s)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,28 +1,30 @@
|
|||
zstd - standard compression library
|
||||
Copyright (C) 2014-2016, Yann Collet.
|
||||
BSD License
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
For Zstandard software
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
|
||||
|
||||
You can contact the author at :
|
||||
- zstd source repository : https://github.com/Cyan4973/zstd
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name Facebook, nor Meta, nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
|||
55
uppsrc/plugin/zstd/lib/allocations.h
Normal file
55
uppsrc/plugin/zstd/lib/allocations.h
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* This file provides custom allocation primitives
|
||||
*/
|
||||
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
|
||||
|
||||
#include "compiler.h" /* MEM_STATIC */
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h" /* ZSTD_customMem */
|
||||
|
||||
#ifndef ZSTD_ALLOCATIONS_H
|
||||
#define ZSTD_ALLOCATIONS_H
|
||||
|
||||
/* custom memory allocation functions */
|
||||
|
||||
MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc)
|
||||
return customMem.customAlloc(customMem.opaque, size);
|
||||
return ZSTD_malloc(size);
|
||||
}
|
||||
|
||||
MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc) {
|
||||
/* calloc implemented as malloc+memset;
|
||||
* not as efficient as calloc, but next best guess for custom malloc */
|
||||
void* const ptr = customMem.customAlloc(customMem.opaque, size);
|
||||
ZSTD_memset(ptr, 0, size);
|
||||
return ptr;
|
||||
}
|
||||
return ZSTD_calloc(1, size);
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
|
||||
{
|
||||
if (ptr!=NULL) {
|
||||
if (customMem.customFree)
|
||||
customMem.customFree(customMem.opaque, ptr);
|
||||
else
|
||||
ZSTD_free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ZSTD_ALLOCATIONS_H */
|
||||
200
uppsrc/plugin/zstd/lib/bits.h
Normal file
200
uppsrc/plugin/zstd/lib/bits.h
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_BITS_H
|
||||
#define ZSTD_BITS_H
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
|
||||
30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7,
|
||||
26, 12, 18, 6, 11, 5, 10, 9};
|
||||
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_tzcnt_u32(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (unsigned)__builtin_ctz(val);
|
||||
# else
|
||||
return ZSTD_countTrailingZeros32_fallback(val);
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
|
||||
assert(val != 0);
|
||||
{
|
||||
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
|
||||
11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7,
|
||||
19, 27, 23, 6, 26, 5, 4, 31};
|
||||
val |= val >> 1;
|
||||
val |= val >> 2;
|
||||
val |= val >> 4;
|
||||
val |= val >> 8;
|
||||
val |= val >> 16;
|
||||
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_lzcnt_u32(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse(&r, val);
|
||||
return (unsigned)(31 - r);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (unsigned)__builtin_clz(val);
|
||||
# else
|
||||
return ZSTD_countLeadingZeros32_fallback(val);
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_tzcnt_u64(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanForward64(&r, val);
|
||||
return (unsigned)r;
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
|
||||
return (unsigned)__builtin_ctzll(val);
|
||||
# else
|
||||
{
|
||||
U32 mostSignificantWord = (U32)(val >> 32);
|
||||
U32 leastSignificantWord = (U32)val;
|
||||
if (leastSignificantWord == 0) {
|
||||
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
|
||||
} else {
|
||||
return ZSTD_countTrailingZeros32(leastSignificantWord);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
# if STATIC_BMI2 == 1
|
||||
return (unsigned)_lzcnt_u64(val);
|
||||
# else
|
||||
if (val != 0) {
|
||||
unsigned long r;
|
||||
_BitScanReverse64(&r, val);
|
||||
return (unsigned)(63 - r);
|
||||
} else {
|
||||
/* Should not reach this code path */
|
||||
__assume(0);
|
||||
}
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
return (unsigned)(__builtin_clzll(val));
|
||||
# else
|
||||
{
|
||||
U32 mostSignificantWord = (U32)(val >> 32);
|
||||
U32 leastSignificantWord = (U32)val;
|
||||
if (mostSignificantWord == 0) {
|
||||
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
|
||||
} else {
|
||||
return ZSTD_countLeadingZeros32(mostSignificantWord);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
|
||||
{
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
return ZSTD_countTrailingZeros64((U64)val) >> 3;
|
||||
} else {
|
||||
return ZSTD_countTrailingZeros32((U32)val) >> 3;
|
||||
}
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
return ZSTD_countLeadingZeros64((U64)val) >> 3;
|
||||
} else {
|
||||
return ZSTD_countLeadingZeros32((U32)val) >> 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
||||
{
|
||||
assert(val != 0);
|
||||
return 31 - ZSTD_countLeadingZeros32(val);
|
||||
}
|
||||
|
||||
/* ZSTD_rotateRight_*():
|
||||
* Rotates a bitfield to the right by "count" bits.
|
||||
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
||||
*/
|
||||
MEM_STATIC
|
||||
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
||||
assert(count < 64);
|
||||
count &= 0x3F; /* for fickle pattern recognition */
|
||||
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
||||
}
|
||||
|
||||
MEM_STATIC
|
||||
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
||||
assert(count < 32);
|
||||
count &= 0x1F; /* for fickle pattern recognition */
|
||||
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
||||
}
|
||||
|
||||
MEM_STATIC
|
||||
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
||||
assert(count < 16);
|
||||
count &= 0x0F; /* for fickle pattern recognition */
|
||||
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
||||
}
|
||||
|
||||
#endif /* ZSTD_BITS_H */
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* bitstream
|
||||
* Part of FSE library
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -17,7 +17,6 @@
|
|||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This API consists of small unitary functions, which must be inlined for best performance.
|
||||
* Since link-time-optimization is not available for all compilers,
|
||||
|
|
@ -31,15 +30,18 @@ extern "C" {
|
|||
#include "compiler.h" /* UNLIKELY() */
|
||||
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
||||
#include "error_private.h" /* error codes and messages */
|
||||
#include "bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
|
||||
/*=========================================
|
||||
* Target specific
|
||||
=========================================*/
|
||||
#if defined(__BMI__) && defined(__GNUC__)
|
||||
# include <immintrin.h> /* support for bextr (experimental) */
|
||||
#elif defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
#ifndef ZSTD_NO_INTRINSICS
|
||||
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
|
||||
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
|
||||
# elif defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define STREAM_ACCUMULATOR_MIN_32 25
|
||||
|
|
@ -88,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|||
/*-********************************************
|
||||
* bitStream decoding API (read backward)
|
||||
**********************************************/
|
||||
typedef size_t BitContainerType;
|
||||
typedef struct {
|
||||
size_t bitContainer;
|
||||
BitContainerType bitContainer;
|
||||
unsigned bitsConsumed;
|
||||
const char* ptr;
|
||||
const char* start;
|
||||
const char* limitPtr;
|
||||
} BIT_DStream_t;
|
||||
|
||||
typedef enum { BIT_DStream_unfinished = 0,
|
||||
BIT_DStream_endOfBuffer = 1,
|
||||
BIT_DStream_completed = 2,
|
||||
BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
|
||||
/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
|
||||
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
|
||||
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
|
||||
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
|
||||
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
|
||||
} BIT_DStream_status; /* result of BIT_reloadDStream() */
|
||||
|
||||
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
|
||||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
|
||||
|
|
@ -110,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
|
|||
|
||||
/* Start by invoking BIT_initDStream().
|
||||
* A chunk of the bitStream is then stored into a local register.
|
||||
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
|
||||
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
|
||||
* You can then retrieve bitFields stored into the local register, **in reverse order**.
|
||||
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
|
||||
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
|
||||
|
|
@ -131,38 +134,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
|
|||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
||||
/* faster, but works only if nbBits >= 1 */
|
||||
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Internal functions
|
||||
****************************************************************/
|
||||
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return 31 - __CLZ(val);
|
||||
# else /* Software version */
|
||||
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
|
||||
11, 14, 16, 18, 22, 25, 3, 30,
|
||||
8, 12, 20, 28, 15, 17, 24, 7,
|
||||
19, 27, 23, 6, 26, 5, 4, 31 };
|
||||
U32 v = val;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
/*===== Local Constants =====*/
|
||||
static const unsigned BIT_mask[] = {
|
||||
0, 1, 3, 7, 0xF, 0x1F,
|
||||
|
|
@ -192,16 +163,26 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
|||
return 0;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
{
|
||||
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
|
||||
return _bzhi_u64(bitContainer, nbBits);
|
||||
#else
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
return bitContainer & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! BIT_addBits() :
|
||||
* can add up to 31 bits into `bitC`.
|
||||
* Note : does not check for register overflow ! */
|
||||
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
||||
size_t value, unsigned nbBits)
|
||||
{
|
||||
MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
||||
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
||||
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
|
||||
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
|
||||
bitC->bitPos += nbBits;
|
||||
}
|
||||
|
||||
|
|
@ -271,7 +252,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
|
|||
*/
|
||||
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
|
||||
{
|
||||
if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
|
||||
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
|
||||
|
||||
bitD->start = (const char*)srcBuffer;
|
||||
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
|
||||
|
|
@ -280,35 +261,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|||
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
|
||||
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
||||
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
||||
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
|
||||
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
|
||||
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
|
||||
} else {
|
||||
bitD->ptr = bitD->start;
|
||||
bitD->bitContainer = *(const BYTE*)(bitD->start);
|
||||
switch(srcSize)
|
||||
{
|
||||
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
|
||||
/* fall-through */
|
||||
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
|
||||
/* fall-through */
|
||||
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
|
||||
/* fall-through */
|
||||
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
|
||||
/* fall-through */
|
||||
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
|
||||
/* fall-through */
|
||||
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
|
||||
/* fall-through */
|
||||
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
||||
default: break;
|
||||
}
|
||||
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
||||
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
|
||||
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
||||
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
|
||||
}
|
||||
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
|
||||
|
|
@ -317,23 +298,26 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|||
return srcSize;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
||||
FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
|
||||
{
|
||||
return bitContainer >> start;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
||||
FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
|
||||
{
|
||||
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
|
||||
* than accessing memory. When bmi2 instruction is not present, we consider
|
||||
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
||||
* importance.
|
||||
*/
|
||||
#if defined(__x86_64__) || defined(_M_X86)
|
||||
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
||||
#else
|
||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
{
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
return bitContainer & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! BIT_lookBits() :
|
||||
|
|
@ -342,7 +326,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|||
* On 32-bits, maxNbBits==24.
|
||||
* On 64-bits, maxNbBits==56.
|
||||
* @return : value extracted */
|
||||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
{
|
||||
/* arbitrate between double-shift and shift+mask */
|
||||
#if 1
|
||||
|
|
@ -365,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
|||
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
||||
}
|
||||
|
||||
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
{
|
||||
bitD->bitsConsumed += nbBits;
|
||||
}
|
||||
|
|
@ -374,7 +358,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|||
* Read (consume) next n bits from local register and update.
|
||||
* Pay attention to not read more than nbBits contained into local register.
|
||||
* @return : extracted value. */
|
||||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t const value = BIT_lookBits(bitD, nbBits);
|
||||
BIT_skipBits(bitD, nbBits);
|
||||
|
|
@ -382,7 +366,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
|||
}
|
||||
|
||||
/*! BIT_readBitsFast() :
|
||||
* unsafe version; only works only if nbBits >= 1 */
|
||||
* unsafe version; only works if nbBits >= 1 */
|
||||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
||||
|
|
@ -391,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
|||
return value;
|
||||
}
|
||||
|
||||
/*! BIT_reloadDStream_internal() :
|
||||
* Simple variant of BIT_reloadDStream(), with two conditions:
|
||||
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
|
||||
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
|
||||
*/
|
||||
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
|
||||
{
|
||||
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
|
||||
bitD->ptr -= bitD->bitsConsumed >> 3;
|
||||
assert(bitD->ptr >= bitD->start);
|
||||
bitD->bitsConsumed &= 7;
|
||||
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
||||
return BIT_DStream_unfinished;
|
||||
}
|
||||
|
||||
/*! BIT_reloadDStreamFast() :
|
||||
* Similar to BIT_reloadDStream(), but with two differences:
|
||||
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
|
||||
|
|
@ -401,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
|
|||
{
|
||||
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
|
||||
return BIT_DStream_overflow;
|
||||
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
|
||||
bitD->ptr -= bitD->bitsConsumed >> 3;
|
||||
bitD->bitsConsumed &= 7;
|
||||
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
||||
return BIT_DStream_unfinished;
|
||||
return BIT_reloadDStream_internal(bitD);
|
||||
}
|
||||
|
||||
/*! BIT_reloadDStream() :
|
||||
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
||||
* This function is safe, it guarantees it will not read beyond src buffer.
|
||||
* This function is safe, it guarantees it will not never beyond src buffer.
|
||||
* @return : status of `BIT_DStream_t` internal register.
|
||||
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
|
||||
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
||||
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
||||
{
|
||||
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
|
||||
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
|
||||
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
|
||||
static const BitContainerType zeroFilled = 0;
|
||||
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
|
||||
/* overflow detected, erroneous scenario or end of stream: no update */
|
||||
return BIT_DStream_overflow;
|
||||
}
|
||||
|
||||
assert(bitD->ptr >= bitD->start);
|
||||
|
||||
if (bitD->ptr >= bitD->limitPtr) {
|
||||
return BIT_reloadDStreamFast(bitD);
|
||||
return BIT_reloadDStream_internal(bitD);
|
||||
}
|
||||
if (bitD->ptr == bitD->start) {
|
||||
/* reached end of bitStream => no update */
|
||||
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
|
||||
return BIT_DStream_completed;
|
||||
}
|
||||
/* start < ptr < limitPtr */
|
||||
/* start < ptr < limitPtr => cautious update */
|
||||
{ U32 nbBytes = bitD->bitsConsumed >> 3;
|
||||
BIT_DStream_status result = BIT_DStream_unfinished;
|
||||
if (bitD->ptr - nbBytes < bitD->start) {
|
||||
|
|
|
|||
134
uppsrc/plugin/zstd/lib/clevels.h
Normal file
134
uppsrc/plugin/zstd/lib/clevels.h
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_CLEVELS_H
|
||||
#define ZSTD_CLEVELS_H
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
|
||||
#include "zstd.h"
|
||||
|
||||
/*-===== Pre-defined compression levels =====-*/
|
||||
|
||||
#define ZSTD_MAX_CLEVEL 22
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__unused__))
|
||||
#endif
|
||||
|
||||
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
||||
{ /* "default" - for any srcSize > 256 KB */
|
||||
/* W, C, H, S, L, TL, strat */
|
||||
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
|
||||
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
||||
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
|
||||
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
||||
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
||||
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
||||
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
||||
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
||||
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
||||
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
||||
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
||||
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
||||
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
||||
},
|
||||
{ /* for srcSize <= 256 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
||||
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
|
||||
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
|
||||
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
||||
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
||||
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
||||
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
||||
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
||||
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 128 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
||||
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
||||
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
||||
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
||||
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
||||
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 16 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
||||
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
||||
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
||||
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
||||
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
||||
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
||||
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
||||
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
||||
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
||||
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
||||
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
||||
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* ZSTD_CLEVELS_H */
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -11,6 +11,10 @@
|
|||
#ifndef ZSTD_COMPILER_H
|
||||
#define ZSTD_COMPILER_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "portability_macros.h"
|
||||
|
||||
/*-*******************************************************
|
||||
* Compiler specifics
|
||||
*********************************************************/
|
||||
|
|
@ -38,12 +42,30 @@
|
|||
|
||||
#endif
|
||||
|
||||
/**
|
||||
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
|
||||
This explicitly marks such functions as __cdecl so that the code will still compile
|
||||
if a CC other than __cdecl has been made the default.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# define WIN_CDECL __cdecl
|
||||
#else
|
||||
# define WIN_CDECL
|
||||
#endif
|
||||
|
||||
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
|
||||
#if defined(__GNUC__)
|
||||
# define UNUSED_ATTR __attribute__((unused))
|
||||
#else
|
||||
# define UNUSED_ATTR
|
||||
#endif
|
||||
|
||||
/**
|
||||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
||||
* parameters. They must be inlined for the compiler to eliminate the constant
|
||||
* branches.
|
||||
*/
|
||||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
||||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
|
||||
/**
|
||||
* HINT_INLINE is used to help the compiler generate better code. It is *not*
|
||||
* used for "templates", so it can be tweaked based on the compilers
|
||||
|
|
@ -58,14 +80,28 @@
|
|||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
|
||||
# define HINT_INLINE static INLINE_KEYWORD
|
||||
#else
|
||||
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
||||
# define HINT_INLINE FORCE_INLINE_TEMPLATE
|
||||
#endif
|
||||
|
||||
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
|
||||
/* "soft" inline :
|
||||
* The compiler is free to select if it's a good idea to inline or not.
|
||||
* The main objective is to silence compiler warnings
|
||||
* when a defined function in included but not used.
|
||||
*
|
||||
* Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
|
||||
* Updating the prefix is probably preferable, but requires a fairly large codemod,
|
||||
* since this name is used everywhere.
|
||||
*/
|
||||
#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
|
||||
#if defined(__GNUC__)
|
||||
# define UNUSED_ATTR __attribute__((unused))
|
||||
# define MEM_STATIC static __inline UNUSED_ATTR
|
||||
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
# define MEM_STATIC static inline
|
||||
#elif defined(_MSC_VER)
|
||||
# define MEM_STATIC static __inline
|
||||
#else
|
||||
# define UNUSED_ATTR
|
||||
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* force no inlining */
|
||||
|
|
@ -79,67 +115,58 @@
|
|||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* target attribute */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
||||
#endif
|
||||
#if defined(__GNUC__) || defined(__ICCARM__)
|
||||
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
||||
#else
|
||||
# define TARGET_ATTRIBUTE(target)
|
||||
#endif
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
/* Target attribute for BMI2 dynamic dispatch.
|
||||
* Enable lzcnt, bmi, and bmi2.
|
||||
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X86)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
|
||||
|
||||
/* prefetch
|
||||
* can be disabled, by declaring NO_PREFETCH build macro */
|
||||
#if defined(NO_PREFETCH)
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
|
||||
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
|
||||
#else
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
||||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
||||
# elif defined(__aarch64__)
|
||||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
|
||||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
|
||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
||||
# elif defined(__aarch64__)
|
||||
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
|
||||
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
|
||||
# else
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
|
||||
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
|
||||
# endif
|
||||
#endif /* NO_PREFETCH */
|
||||
|
||||
#define CACHELINE_SIZE 64
|
||||
|
||||
#define PREFETCH_AREA(p, s) { \
|
||||
const char* const _ptr = (const char*)(p); \
|
||||
size_t const _size = (size_t)(s); \
|
||||
size_t _pos; \
|
||||
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
||||
PREFETCH_L2(_ptr + _pos); \
|
||||
} \
|
||||
}
|
||||
#define PREFETCH_AREA(p, s) \
|
||||
do { \
|
||||
const char* const _ptr = (const char*)(p); \
|
||||
size_t const _size = (size_t)(s); \
|
||||
size_t _pos; \
|
||||
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
||||
PREFETCH_L2(_ptr + _pos); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* vectorization
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
|
||||
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
|
||||
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
|
||||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
# else
|
||||
|
|
@ -162,6 +189,12 @@
|
|||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
|
||||
# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
|
||||
#else
|
||||
# define ZSTD_UNREACHABLE do { assert(0); } while (0)
|
||||
#endif
|
||||
|
||||
/* disable warnings */
|
||||
#ifdef _MSC_VER /* Visual Studio */
|
||||
# include <intrin.h> /* For Visual 2005 */
|
||||
|
|
@ -172,4 +205,246 @@
|
|||
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
|
||||
#endif
|
||||
|
||||
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
|
||||
#ifndef STATIC_BMI2
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
|
||||
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
|
||||
# define STATIC_BMI2 1
|
||||
# endif
|
||||
# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
|
||||
# define STATIC_BMI2 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef STATIC_BMI2
|
||||
#define STATIC_BMI2 0
|
||||
#endif
|
||||
|
||||
/* compile time determination of SIMD support */
|
||||
#if !defined(ZSTD_NO_INTRINSICS)
|
||||
# if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
|
||||
# define ZSTD_ARCH_X86_SSE2
|
||||
# endif
|
||||
# if defined(__ARM_NEON) || defined(_M_ARM64)
|
||||
# define ZSTD_ARCH_ARM_NEON
|
||||
# endif
|
||||
#
|
||||
# if defined(ZSTD_ARCH_X86_SSE2)
|
||||
# include <emmintrin.h>
|
||||
# elif defined(ZSTD_ARCH_ARM_NEON)
|
||||
# include <arm_neon.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* C-language Attributes are added in C23. */
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
||||
# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
||||
#else
|
||||
# define ZSTD_HAS_C_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
|
||||
/* Only use C++ attributes in C++. Some compilers report support for C++
|
||||
* attributes when compiling with C.
|
||||
*/
|
||||
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
||||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
||||
#else
|
||||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
|
||||
/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
|
||||
* - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
|
||||
* - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
||||
* - Else: __attribute__((__fallthrough__))
|
||||
*/
|
||||
#ifndef ZSTD_FALLTHROUGH
|
||||
# if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
|
||||
# define ZSTD_FALLTHROUGH [[fallthrough]]
|
||||
# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
|
||||
# define ZSTD_FALLTHROUGH [[fallthrough]]
|
||||
# elif __has_attribute(__fallthrough__)
|
||||
/* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
|
||||
* gcc complains about: a label can only be part of a statement and a declaration is not a statement.
|
||||
*/
|
||||
# define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
|
||||
# else
|
||||
# define ZSTD_FALLTHROUGH
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*-**************************************************************
|
||||
* Alignment check
|
||||
*****************************************************************/
|
||||
|
||||
/* this test was initially positioned in mem.h,
|
||||
* but this file is removed (or replaced) for linux kernel
|
||||
* so it's now hosted in compiler.h,
|
||||
* which remains valid for both user & kernel spaces.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_ALIGNOF
|
||||
# if defined(__GNUC__) || defined(_MSC_VER)
|
||||
/* covers gcc, clang & MSVC */
|
||||
/* note : this section must come first, before C11,
|
||||
* due to a limitation in the kernel source generator */
|
||||
# define ZSTD_ALIGNOF(T) __alignof(T)
|
||||
|
||||
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
|
||||
/* C11 support */
|
||||
# include <stdalign.h>
|
||||
# define ZSTD_ALIGNOF(T) alignof(T)
|
||||
|
||||
# else
|
||||
/* No known support for alignof() - imperfect backup */
|
||||
# define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
|
||||
|
||||
# endif
|
||||
#endif /* ZSTD_ALIGNOF */
|
||||
|
||||
/*-**************************************************************
|
||||
* Sanitizer
|
||||
*****************************************************************/
|
||||
|
||||
/**
|
||||
* Zstd relies on pointer overflow in its decompressor.
|
||||
* We add this attribute to functions that rely on pointer overflow.
|
||||
*/
|
||||
#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
# if __has_attribute(no_sanitize)
|
||||
# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
|
||||
/* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
|
||||
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
|
||||
# else
|
||||
/* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
|
||||
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
|
||||
# endif
|
||||
# else
|
||||
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Helper function to perform a wrapped pointer difference without trigging
|
||||
* UBSAN.
|
||||
*
|
||||
* @returns lhs - rhs with wrapping
|
||||
*/
|
||||
MEM_STATIC
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
|
||||
{
|
||||
return lhs - rhs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to perform a wrapped pointer add without triggering UBSAN.
|
||||
*
|
||||
* @return ptr + add with wrapping
|
||||
*/
|
||||
MEM_STATIC
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
|
||||
{
|
||||
return ptr + add;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to perform a wrapped pointer subtraction without triggering
|
||||
* UBSAN.
|
||||
*
|
||||
* @return ptr - sub with wrapping
|
||||
*/
|
||||
MEM_STATIC
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
|
||||
{
|
||||
return ptr - sub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to add to a pointer that works around C's undefined behavior
|
||||
* of adding 0 to NULL.
|
||||
*
|
||||
* @returns `ptr + add` except it defines `NULL + 0 == NULL`.
|
||||
*/
|
||||
MEM_STATIC
|
||||
unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
|
||||
{
|
||||
return add > 0 ? ptr + add : ptr;
|
||||
}
|
||||
|
||||
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
|
||||
* abundance of caution, disable our custom poisoning on mingw. */
|
||||
#ifdef __MINGW32__
|
||||
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
|
||||
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
|
||||
#endif
|
||||
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
|
||||
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
#include <stddef.h> /* size_t */
|
||||
#define ZSTD_DEPS_NEED_STDINT
|
||||
#include "zstd_deps.h" /* intptr_t */
|
||||
|
||||
/* Make memory region fully initialized (without changing its contents). */
|
||||
void __msan_unpoison(const volatile void *a, size_t size);
|
||||
|
||||
/* Make memory region fully uninitialized (without changing its contents).
|
||||
This is a legacy interface that does not update origin information. Use
|
||||
__msan_allocated_memory() instead. */
|
||||
void __msan_poison(const volatile void *a, size_t size);
|
||||
|
||||
/* Returns the offset of the first (at least partially) poisoned byte in the
|
||||
memory range, or -1 if the whole range is good. */
|
||||
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
||||
|
||||
/* Print shadow and origin for the memory range to stderr in a human-readable
|
||||
format. */
|
||||
void __msan_print_shadow(const volatile void *x, size_t size);
|
||||
#endif
|
||||
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Instrumented
|
||||
* code is forbidden from accessing addresses in this region until it is
|
||||
* unpoisoned. This function is not guaranteed to poison the entire region -
|
||||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
|
||||
* alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can poison or
|
||||
* unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_poison_memory_region(void const volatile *addr, size_t size);
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Accessing
|
||||
* addresses in this region is allowed until this region is poisoned again.
|
||||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
|
||||
* to ASan alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can
|
||||
* poison or unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_COMPILER_H */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -16,8 +16,6 @@
|
|||
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
@ -37,6 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
|
|||
U32 f7b = 0;
|
||||
U32 f7c = 0;
|
||||
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
||||
#if !defined(__clang__)
|
||||
int reg[4];
|
||||
__cpuid((int*)reg, 0);
|
||||
{
|
||||
|
|
@ -52,6 +51,41 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
|
|||
f7c = (U32)reg[2];
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
|
||||
* which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
|
||||
* to due to being a reserved register. So in that case, do the `cpuid`
|
||||
* ourselves. Clang supports inline assembly anyway.
|
||||
*/
|
||||
U32 n;
|
||||
__asm__(
|
||||
"pushq %%rbx\n\t"
|
||||
"cpuid\n\t"
|
||||
"popq %%rbx\n\t"
|
||||
: "=a"(n)
|
||||
: "a"(0)
|
||||
: "rcx", "rdx");
|
||||
if (n >= 1) {
|
||||
U32 f1a;
|
||||
__asm__(
|
||||
"pushq %%rbx\n\t"
|
||||
"cpuid\n\t"
|
||||
"popq %%rbx\n\t"
|
||||
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
|
||||
: "a"(1)
|
||||
:);
|
||||
}
|
||||
if (n >= 7) {
|
||||
__asm__(
|
||||
"pushq %%rbx\n\t"
|
||||
"cpuid\n\t"
|
||||
"movq %%rbx, %%rax\n\t"
|
||||
"popq %%rbx"
|
||||
: "=a"(f7b), "=c"(f7c)
|
||||
: "a"(7), "c"(0)
|
||||
: "rdx");
|
||||
}
|
||||
#endif
|
||||
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
|
||||
/* The following block like the normal cpuid branch below, but gcc
|
||||
* reserves ebx for use of its pic register so we must specially
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* debug
|
||||
* Part of FSE library
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -21,4 +21,10 @@
|
|||
|
||||
#include "debug.h"
|
||||
|
||||
#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
|
||||
/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
|
||||
* translation unit is empty. So remove this from Linux kernel builds, but
|
||||
* otherwise just leave it in.
|
||||
*/
|
||||
int g_debuglevel = DEBUGLEVEL;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* debug
|
||||
* Part of FSE library
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -51,15 +51,6 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
|
||||
/* DEBUGFILE can be defined externally,
|
||||
* typically through compiler command line.
|
||||
* note : currently useless.
|
||||
* Value must be stderr or stdout */
|
||||
#ifndef DEBUGFILE
|
||||
# define DEBUGFILE stderr
|
||||
#endif
|
||||
|
||||
|
||||
/* recommended values for DEBUGLEVEL :
|
||||
* 0 : release mode, no debug, all run-time checks disabled
|
||||
* 1 : enables assert() only, no display
|
||||
|
|
@ -76,7 +67,8 @@ extern "C" {
|
|||
*/
|
||||
|
||||
#if (DEBUGLEVEL>=1)
|
||||
# include <assert.h>
|
||||
# define ZSTD_DEPS_NEED_ASSERT
|
||||
# include "zstd_deps.h"
|
||||
#else
|
||||
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
|
||||
# define assert(condition) ((void)0) /* disable assert (default) */
|
||||
|
|
@ -84,7 +76,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
#if (DEBUGLEVEL>=2)
|
||||
# include <stdio.h>
|
||||
# define ZSTD_DEPS_NEED_IO
|
||||
# include "zstd_deps.h"
|
||||
extern int g_debuglevel; /* the variable is only declared,
|
||||
it actually lives in debug.c,
|
||||
and is shared by the whole process.
|
||||
|
|
@ -92,18 +85,27 @@ extern int g_debuglevel; /* the variable is only declared,
|
|||
It's useful when enabling very verbose levels
|
||||
on selective conditions (such as position in src) */
|
||||
|
||||
# define RAWLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
} }
|
||||
# define DEBUGLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
|
||||
fprintf(stderr, " \n"); \
|
||||
} }
|
||||
# define RAWLOG(l, ...) \
|
||||
do { \
|
||||
if (l<=g_debuglevel) { \
|
||||
ZSTD_DEBUG_PRINT(__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define TOSTRING(x) STRINGIFY(x)
|
||||
#define LINE_AS_STRING TOSTRING(__LINE__)
|
||||
|
||||
# define DEBUGLOG(l, ...) \
|
||||
do { \
|
||||
if (l<=g_debuglevel) { \
|
||||
ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
|
||||
ZSTD_DEBUG_PRINT(" \n"); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
# define RAWLOG(l, ...) {} /* disabled */
|
||||
# define DEBUGLOG(l, ...) {} /* disabled */
|
||||
# define RAWLOG(l, ...) do { } while (0) /* disabled */
|
||||
# define DEBUGLOG(l, ...) do { } while (0) /* disabled */
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/* ******************************************************************
|
||||
* Common functions of New Generation Entropy library
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -19,8 +19,8 @@
|
|||
#include "error_private.h" /* ERR_*, ERROR */
|
||||
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
|
||||
#include "huf.h"
|
||||
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
|
||||
|
||||
|
||||
/*=== Version ===*/
|
||||
|
|
@ -38,8 +38,9 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
|
|||
/*-**************************************************************
|
||||
* FSE NCount encoding-decoding
|
||||
****************************************************************/
|
||||
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) headerBuffer;
|
||||
const BYTE* const iend = istart + hbSize;
|
||||
|
|
@ -50,23 +51,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||
U32 bitStream;
|
||||
int bitCount;
|
||||
unsigned charnum = 0;
|
||||
unsigned const maxSV1 = *maxSVPtr + 1;
|
||||
int previous0 = 0;
|
||||
|
||||
if (hbSize < 4) {
|
||||
/* This function only works when hbSize >= 4 */
|
||||
char buffer[4];
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
memcpy(buffer, headerBuffer, hbSize);
|
||||
if (hbSize < 8) {
|
||||
/* This function only works when hbSize >= 8 */
|
||||
char buffer[8] = {0};
|
||||
ZSTD_memcpy(buffer, headerBuffer, hbSize);
|
||||
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
|
||||
buffer, sizeof(buffer));
|
||||
if (FSE_isError(countSize)) return countSize;
|
||||
if (countSize > hbSize) return ERROR(corruption_detected);
|
||||
return countSize;
|
||||
} }
|
||||
assert(hbSize >= 4);
|
||||
assert(hbSize >= 8);
|
||||
|
||||
/* init */
|
||||
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
|
||||
ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
|
||||
bitStream = MEM_readLE32(ip);
|
||||
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
|
||||
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
|
||||
|
|
@ -77,36 +78,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||
threshold = 1<<nbBits;
|
||||
nbBits++;
|
||||
|
||||
while ((remaining>1) & (charnum<=*maxSVPtr)) {
|
||||
for (;;) {
|
||||
if (previous0) {
|
||||
unsigned n0 = charnum;
|
||||
while ((bitStream & 0xFFFF) == 0xFFFF) {
|
||||
n0 += 24;
|
||||
if (ip < iend-5) {
|
||||
ip += 2;
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
/* Count the number of repeats. Each time the
|
||||
* 2-bit repeat code is 0b11 there is another
|
||||
* repeat.
|
||||
* Avoid UB by setting the high bit to 1.
|
||||
*/
|
||||
int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
|
||||
while (repeats >= 12) {
|
||||
charnum += 3 * 12;
|
||||
if (LIKELY(ip <= iend-7)) {
|
||||
ip += 3;
|
||||
} else {
|
||||
bitStream >>= 16;
|
||||
bitCount += 16;
|
||||
} }
|
||||
while ((bitStream & 3) == 3) {
|
||||
n0 += 3;
|
||||
bitStream >>= 2;
|
||||
bitCount += 2;
|
||||
bitCount -= (int)(8 * (iend - 7 - ip));
|
||||
bitCount &= 31;
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
|
||||
}
|
||||
n0 += bitStream & 3;
|
||||
charnum += 3 * repeats;
|
||||
bitStream >>= 2 * repeats;
|
||||
bitCount += 2 * repeats;
|
||||
|
||||
/* Add the final repeat which isn't 0b11. */
|
||||
assert((bitStream & 3) < 3);
|
||||
charnum += bitStream & 3;
|
||||
bitCount += 2;
|
||||
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
|
||||
while (charnum < n0) normalizedCounter[charnum++] = 0;
|
||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
|
||||
/* This is an error, but break and return an error
|
||||
* at the end, because returning out of a loop makes
|
||||
* it harder for the compiler to optimize.
|
||||
*/
|
||||
if (charnum >= maxSV1) break;
|
||||
|
||||
/* We don't need to set the normalized count to 0
|
||||
* because we already memset the whole buffer to 0.
|
||||
*/
|
||||
|
||||
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
assert((bitCount >> 3) <= 3); /* For first condition to work */
|
||||
ip += bitCount>>3;
|
||||
bitCount &= 7;
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
} else {
|
||||
bitStream >>= 2;
|
||||
} }
|
||||
{ int const max = (2*threshold-1) - remaining;
|
||||
bitCount -= (int)(8 * (iend - 4 - ip));
|
||||
bitCount &= 31;
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
}
|
||||
{
|
||||
int const max = (2*threshold-1) - remaining;
|
||||
int count;
|
||||
|
||||
if ((bitStream & (threshold-1)) < (U32)max) {
|
||||
|
|
@ -119,24 +142,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||
}
|
||||
|
||||
count--; /* extra accuracy */
|
||||
remaining -= count < 0 ? -count : count; /* -1 means +1 */
|
||||
/* When it matters (small blocks), this is a
|
||||
* predictable branch, because we don't use -1.
|
||||
*/
|
||||
if (count >= 0) {
|
||||
remaining -= count;
|
||||
} else {
|
||||
assert(count == -1);
|
||||
remaining += count;
|
||||
}
|
||||
normalizedCounter[charnum++] = (short)count;
|
||||
previous0 = !count;
|
||||
while (remaining < threshold) {
|
||||
nbBits--;
|
||||
threshold >>= 1;
|
||||
}
|
||||
|
||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
assert(threshold > 1);
|
||||
if (remaining < threshold) {
|
||||
/* This branch can be folded into the
|
||||
* threshold update condition because we
|
||||
* know that threshold > 1.
|
||||
*/
|
||||
if (remaining <= 1) break;
|
||||
nbBits = ZSTD_highbit32(remaining) + 1;
|
||||
threshold = 1 << (nbBits - 1);
|
||||
}
|
||||
if (charnum >= maxSV1) break;
|
||||
|
||||
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||
ip += bitCount>>3;
|
||||
bitCount &= 7;
|
||||
} else {
|
||||
bitCount -= (int)(8 * (iend - 4 - ip));
|
||||
bitCount &= 31;
|
||||
ip = iend - 4;
|
||||
}
|
||||
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
|
||||
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
} }
|
||||
if (remaining != 1) return ERROR(corruption_detected);
|
||||
/* Only possible when there are too many zeros. */
|
||||
if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
|
||||
if (bitCount > 32) return ERROR(corruption_detected);
|
||||
*maxSVPtr = charnum-1;
|
||||
|
||||
|
|
@ -144,6 +186,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||
return ip-istart;
|
||||
}
|
||||
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static size_t FSE_readNCount_body_default(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t FSE_readNCount_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize, int bmi2)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
|
||||
}
|
||||
|
||||
size_t FSE_readNCount(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
|
||||
/*! HUF_readStats() :
|
||||
Read compact Huffman tree, saved by HUF_writeCTable().
|
||||
|
|
@ -155,6 +234,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
{
|
||||
U32 weightTotal;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
|
|
@ -163,7 +253,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|||
|
||||
if (!srcSize) return ERROR(srcSize_wrong);
|
||||
iSize = ip[0];
|
||||
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
|
||||
/* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
|
||||
|
||||
if (iSize >= 128) { /* special header */
|
||||
oSize = iSize - 127;
|
||||
|
|
@ -177,31 +267,31 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|||
huffWeight[n+1] = ip[n/2] & 15;
|
||||
} } }
|
||||
else { /* header compressed with FSE (normal case) */
|
||||
FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
|
||||
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
|
||||
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
|
||||
/* max (hwSize-1) values decoded, as last one is implied */
|
||||
oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
|
||||
if (FSE_isError(oSize)) return oSize;
|
||||
}
|
||||
|
||||
/* collect weight stats */
|
||||
memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
} }
|
||||
if (weightTotal == 0) return ERROR(corruption_detected);
|
||||
|
||||
/* get last non-null symbol weight (implied, total must be 2^n) */
|
||||
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
|
||||
{ U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
|
||||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
*tableLogPtr = tableLog;
|
||||
/* determine last weight */
|
||||
{ U32 const total = 1 << tableLog;
|
||||
U32 const rest = total - weightTotal;
|
||||
U32 const verif = 1 << BIT_highbit32(rest);
|
||||
U32 const lastWeight = BIT_highbit32(rest) + 1;
|
||||
U32 const verif = 1 << ZSTD_highbit32(rest);
|
||||
U32 const lastWeight = ZSTD_highbit32(rest) + 1;
|
||||
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
|
||||
huffWeight[oSize] = (BYTE)lastWeight;
|
||||
rankStats[lastWeight]++;
|
||||
|
|
@ -214,3 +304,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|||
*nbSymbolsPtr = (U32)(oSize+1);
|
||||
return iSize+1;
|
||||
}
|
||||
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
int flags)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (flags & HUF_flags_bmi2) {
|
||||
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
#endif
|
||||
(void)flags;
|
||||
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -27,9 +27,11 @@ const char* ERR_getErrorString(ERR_enum code)
|
|||
case PREFIX(version_unsupported): return "Version not supported";
|
||||
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
|
||||
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
|
||||
case PREFIX(corruption_detected): return "Corrupted block detected";
|
||||
case PREFIX(corruption_detected): return "Data corruption detected";
|
||||
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
|
||||
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
|
||||
case PREFIX(parameter_unsupported): return "Unsupported parameter";
|
||||
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
|
||||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
|
||||
case PREFIX(init_missing): return "Context should be init first";
|
||||
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
|
||||
|
|
@ -38,16 +40,22 @@ const char* ERR_getErrorString(ERR_enum code)
|
|||
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
|
||||
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
|
||||
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
|
||||
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
|
||||
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
|
||||
case PREFIX(dictionary_wrong): return "Dictionary mismatch";
|
||||
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
|
||||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
|
||||
case PREFIX(srcSize_wrong): return "Src size is incorrect";
|
||||
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
|
||||
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
|
||||
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
|
||||
/* following error codes are not stable and may be removed or changed in a future version */
|
||||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
|
||||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
|
||||
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
|
||||
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
|
||||
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
|
||||
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -21,8 +21,10 @@ extern "C" {
|
|||
/* ****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_errors.h" /* enum list */
|
||||
#include "compiler.h"
|
||||
#include "debug.h"
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
/* ****************************************
|
||||
|
|
@ -58,8 +60,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
|
|||
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
|
||||
|
||||
/* check and forward error code */
|
||||
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
|
||||
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
|
||||
#define CHECK_V_F(e, f) \
|
||||
size_t const e = f; \
|
||||
do { \
|
||||
if (ERR_isError(e)) \
|
||||
return e; \
|
||||
} while (0)
|
||||
#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0)
|
||||
|
||||
|
||||
/*-****************************************
|
||||
|
|
@ -73,6 +80,87 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
|
|||
return ERR_getErrorString(ERR_getErrorCode(code));
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
do { \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ERR_QUOTE(str) #str
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
do { \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0)
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* FSE : Finite State Entropy codec
|
||||
* Public Prototypes declaration
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -23,7 +23,7 @@ extern "C" {
|
|||
/*-*****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include "zstd_deps.h" /* size_t, ptrdiff_t */
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
|
|
@ -53,34 +53,6 @@ extern "C" {
|
|||
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* FSE simple functions
|
||||
******************************************/
|
||||
/*! FSE_compress() :
|
||||
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
|
||||
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
|
||||
@return : size of compressed data (<= dstCapacity).
|
||||
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
|
||||
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
|
||||
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/*! FSE_decompress():
|
||||
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
|
||||
into already allocated destination buffer 'dst', of size 'dstCapacity'.
|
||||
@return : size of regenerated data (<= maxDstSize),
|
||||
or an error code, which can be tested using FSE_isError() .
|
||||
|
||||
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
|
||||
Why ? : making this distinction requires a header.
|
||||
Header management is intentionally delegated to the user layer, which can better manage special cases.
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
|
||||
const void* cSrc, size_t cSrcSize);
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
* Tool functions
|
||||
******************************************/
|
||||
|
|
@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
|
|||
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
* FSE advanced functions
|
||||
******************************************/
|
||||
/*! FSE_compress2() :
|
||||
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
|
||||
Both parameters can be defined as '0' to mean : use default value
|
||||
@return : size of compressed data
|
||||
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
|
||||
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
|
||||
if FSE_isError(return), it's an error code.
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
|
||||
/*-*****************************************
|
||||
* FSE detailed API
|
||||
******************************************/
|
||||
|
|
@ -137,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
|
|||
/*! FSE_normalizeCount():
|
||||
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
|
||||
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
|
||||
useLowProbCount is a boolean parameter which trades off compressed size for
|
||||
faster header decoding. When it is set to 1, the compressed data will be slightly
|
||||
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
|
||||
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
|
||||
is a good default, since header deserialization makes a big speed difference.
|
||||
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
|
||||
@return : tableLog,
|
||||
or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
|
||||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
|
||||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
|
||||
|
||||
/*! FSE_NCountWriteBound():
|
||||
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
|
||||
|
|
@ -158,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
|||
/*! Constructor and Destructor of FSE_CTable.
|
||||
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
|
||||
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
|
||||
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
|
||||
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
|
||||
|
||||
/*! FSE_buildCTable():
|
||||
Builds `ct`, which must be already allocated, using FSE_createCTable().
|
||||
|
|
@ -228,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
|
|||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
||||
const void* rBuffer, size_t rBuffSize);
|
||||
|
||||
/*! Constructor and Destructor of FSE_DTable.
|
||||
Note that its size depends on 'tableLog' */
|
||||
/*! FSE_readNCount_bmi2():
|
||||
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
|
||||
*/
|
||||
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
|
||||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
||||
const void* rBuffer, size_t rBuffSize, int bmi2);
|
||||
|
||||
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
|
||||
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
|
||||
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
|
||||
|
||||
/*! FSE_buildDTable():
|
||||
Builds 'dt', which must be already allocated, using FSE_createDTable().
|
||||
return : 0, or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
/*! FSE_decompress_usingDTable():
|
||||
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
|
||||
into `dst` which must be already allocated.
|
||||
@return : size of regenerated data (necessarily <= `dstCapacity`),
|
||||
or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
|
||||
|
||||
/*!
|
||||
Tutorial :
|
||||
|
|
@ -276,6 +229,7 @@ If there is an error, the function will return an error code, which can be teste
|
|||
|
||||
#endif /* FSE_H */
|
||||
|
||||
|
||||
#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
|
||||
#define FSE_H_FSE_STATIC_LINKING_ONLY
|
||||
|
||||
|
|
@ -288,12 +242,12 @@ If there is an error, the function will return an error code, which can be teste
|
|||
*******************************************/
|
||||
/* FSE buffer bounds */
|
||||
#define FSE_NCOUNTBOUND 512
|
||||
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
||||
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
||||
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
||||
|
||||
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
|
||||
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
|
||||
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
|
||||
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
|
||||
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
|
||||
|
||||
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
|
||||
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
|
||||
|
|
@ -307,33 +261,28 @@ If there is an error, the function will return an error code, which can be teste
|
|||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
|
||||
/**< same as FSE_optimalTableLog(), which used `minus==2` */
|
||||
|
||||
/* FSE_compress_wksp() :
|
||||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
|
||||
*/
|
||||
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
|
||||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
||||
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
|
||||
|
||||
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
||||
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
|
||||
|
||||
/* FSE_buildCTable_wksp() :
|
||||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` must be >= `(1<<tableLog)`.
|
||||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
|
||||
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
|
||||
*/
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
|
||||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
|
||||
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
|
||||
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
|
||||
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
|
||||
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
|
||||
|
||||
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
|
||||
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
|
||||
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
|
||||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
|
||||
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
|
||||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
|
||||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
|
||||
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
|
||||
|
||||
typedef enum {
|
||||
FSE_repeat_none, /**< Cannot use the previous table */
|
||||
|
|
@ -516,20 +465,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
|
|||
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
|
||||
const U16* const stateTable = (const U16*)(statePtr->stateTable);
|
||||
U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
|
||||
BIT_addBits(bitC, statePtr->value, nbBitsOut);
|
||||
BIT_addBits(bitC, (size_t)statePtr->value, nbBitsOut);
|
||||
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
|
||||
}
|
||||
|
||||
MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
|
||||
{
|
||||
BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
|
||||
BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
|
||||
BIT_flushBits(bitC);
|
||||
}
|
||||
|
||||
|
||||
/* FSE_getMaxNbBits() :
|
||||
* Approximate maximum cost of a symbol, in bits.
|
||||
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
|
||||
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
|
||||
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
||||
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
|
||||
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
|
||||
|
|
@ -644,6 +593,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|||
#ifndef FSE_DEFAULT_MEMORY_USAGE
|
||||
# define FSE_DEFAULT_MEMORY_USAGE 13
|
||||
#endif
|
||||
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
|
||||
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
|
||||
#endif
|
||||
|
||||
/*!FSE_MAX_SYMBOL_VALUE :
|
||||
* Maximum symbol value authorized.
|
||||
|
|
@ -677,7 +629,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|||
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
|
||||
#endif
|
||||
|
||||
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
|
||||
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
|
||||
|
||||
|
||||
#endif /* FSE_STATIC_LINKING_ONLY */
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/* ******************************************************************
|
||||
* FSE : Finite State Entropy encoder
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -15,8 +15,6 @@
|
|||
/* **************************************************************
|
||||
* Includes
|
||||
****************************************************************/
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include "compiler.h"
|
||||
#include "mem.h" /* U32, U16, etc. */
|
||||
#include "debug.h" /* assert, DEBUGLOG */
|
||||
|
|
@ -25,6 +23,10 @@
|
|||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#include "error_private.h"
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#define ZSTD_DEPS_NEED_MATH64
|
||||
#include "zstd_deps.h" /* ZSTD_memset */
|
||||
#include "bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
|
|
@ -74,41 +76,85 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
|
||||
U32 const maxSV1 = maxSymbolValue+1;
|
||||
|
||||
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
|
||||
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
|
||||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
/* CTable header */
|
||||
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
tableU16[-2] = (U16) tableLog;
|
||||
tableU16[-1] = (U16) maxSymbolValue;
|
||||
assert(tableLog < 16); /* required for threshold strategy to work */
|
||||
|
||||
/* For explanations on how to distribute symbol values over the table :
|
||||
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
||||
* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
||||
|
||||
#ifdef __clang_analyzer__
|
||||
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
||||
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
||||
#endif
|
||||
|
||||
/* symbol start positions */
|
||||
{ U32 u;
|
||||
cumul[0] = 0;
|
||||
for (u=1; u <= maxSymbolValue+1; u++) {
|
||||
for (u=1; u <= maxSV1; u++) {
|
||||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
||||
cumul[u] = cumul[u-1] + 1;
|
||||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
||||
} else {
|
||||
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
|
||||
assert(normalizedCounter[u-1] >= 0);
|
||||
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
|
||||
assert(cumul[u] >= cumul[u-1]); /* no overflow */
|
||||
} }
|
||||
cumul[maxSymbolValue+1] = tableSize+1;
|
||||
cumul[maxSV1] = (U16)(tableSize+1);
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 position = 0;
|
||||
if (highThreshold == tableSize - 1) {
|
||||
/* Case for no low prob count symbols. Lay down 8 bytes at a time
|
||||
* to reduce branch misses since we are operating on a small block
|
||||
*/
|
||||
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
|
||||
{ U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
assert(n>=0);
|
||||
pos += (size_t)n;
|
||||
}
|
||||
}
|
||||
/* Spread symbols across the table. Lack of lowprob symbols means that
|
||||
* we don't need variable sized inner loop, so we can unroll the loop and
|
||||
* reduce branch misses.
|
||||
*/
|
||||
{ size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2; /* Experimentally determined optimal unroll */
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableSymbol[uPosition] = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0); /* Must have initialized all positions */
|
||||
}
|
||||
} else {
|
||||
U32 position = 0;
|
||||
U32 symbol;
|
||||
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
||||
for (symbol=0; symbol<maxSV1; symbol++) {
|
||||
int nbOccurrences;
|
||||
int const freq = normalizedCounter[symbol];
|
||||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
||||
|
|
@ -117,7 +163,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|||
while (position > highThreshold)
|
||||
position = (position + step) & tableMask; /* Low proba area */
|
||||
} }
|
||||
|
||||
assert(position==0); /* Must have initialized all positions */
|
||||
}
|
||||
|
||||
|
|
@ -141,16 +186,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|||
case -1:
|
||||
case 1:
|
||||
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
|
||||
symbolTT[s].deltaFindState = total - 1;
|
||||
assert(total <= INT_MAX);
|
||||
symbolTT[s].deltaFindState = (int)(total - 1);
|
||||
total ++;
|
||||
break;
|
||||
default :
|
||||
{
|
||||
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
|
||||
assert(normalizedCounter[s] > 1);
|
||||
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
|
||||
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
|
||||
symbolTT[s].deltaFindState = total - normalizedCounter[s];
|
||||
total += normalizedCounter[s];
|
||||
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
|
||||
total += (unsigned)normalizedCounter[s];
|
||||
} } } }
|
||||
|
||||
#if 0 /* debug : symbol costs */
|
||||
|
|
@ -161,31 +207,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|||
symbol, normalizedCounter[symbol],
|
||||
FSE_getMaxNbBits(symbolTT, symbol),
|
||||
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
||||
}
|
||||
}
|
||||
} }
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
||||
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding
|
||||
****************************************************************/
|
||||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
||||
+ 4 /* bitCount initialized at 4 */
|
||||
+ 2 /* first two symbols may use one additional bit each */) / 8)
|
||||
+ 1 /* round up to whole nb bytes */
|
||||
+ 2 /* additional two bytes for bitstream flush */;
|
||||
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
||||
}
|
||||
|
||||
|
|
@ -214,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|||
/* Init */
|
||||
remaining = tableSize+1; /* +1 for extra accuracy */
|
||||
threshold = tableSize;
|
||||
nbBits = tableLog+1;
|
||||
nbBits = (int)tableLog+1;
|
||||
|
||||
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
|
||||
if (previousIs0) {
|
||||
|
|
@ -233,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|||
}
|
||||
while (symbol >= start+3) {
|
||||
start+=3;
|
||||
bitStream += 3 << bitCount;
|
||||
bitStream += 3U << bitCount;
|
||||
bitCount += 2;
|
||||
}
|
||||
bitStream += (symbol-start) << bitCount;
|
||||
|
|
@ -253,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|||
count++; /* +1 for extra accuracy */
|
||||
if (count>=threshold)
|
||||
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
|
||||
bitStream += count << bitCount;
|
||||
bitStream += (U32)count << bitCount;
|
||||
bitCount += nbBits;
|
||||
bitCount -= (count<max);
|
||||
previousIs0 = (count==1);
|
||||
|
|
@ -281,7 +322,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|||
out[1] = (BYTE)(bitStream>>8);
|
||||
out+= (bitCount+7) /8;
|
||||
|
||||
return (out-ostart);
|
||||
assert(out >= ostart);
|
||||
return (size_t)(out-ostart);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -302,21 +344,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
|||
* FSE Compression Code
|
||||
****************************************************************/
|
||||
|
||||
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
size_t size;
|
||||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
||||
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
||||
return (FSE_CTable*)malloc(size);
|
||||
}
|
||||
|
||||
void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
|
||||
|
||||
/* provides the minimum logSize to safely represent a distribution */
|
||||
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
||||
{
|
||||
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
|
||||
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
|
||||
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
|
||||
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
|
||||
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
|
||||
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
||||
return minBits;
|
||||
|
|
@ -324,7 +356,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
|||
|
||||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
|
||||
{
|
||||
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
|
||||
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
|
||||
U32 tableLog = maxTableLog;
|
||||
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
|
||||
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
||||
|
|
@ -341,11 +373,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
|||
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
|
||||
}
|
||||
|
||||
|
||||
/* Secondary normalization method.
|
||||
To be used when primary method fails. */
|
||||
|
||||
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
|
||||
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
|
||||
{
|
||||
short const NOT_YET_ASSIGNED = -2;
|
||||
U32 s;
|
||||
|
|
@ -362,7 +393,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|||
continue;
|
||||
}
|
||||
if (count[s] <= lowThreshold) {
|
||||
norm[s] = -1;
|
||||
norm[s] = lowProbCount;
|
||||
distributed++;
|
||||
total -= count[s];
|
||||
continue;
|
||||
|
|
@ -414,7 +445,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|||
|
||||
{ U64 const vStepLog = 62 - tableLog;
|
||||
U64 const mid = (1ULL << (vStepLog-1)) - 1;
|
||||
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
|
||||
U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
|
||||
U64 tmpTotal = mid;
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
if (norm[s]==NOT_YET_ASSIGNED) {
|
||||
|
|
@ -431,10 +462,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
const unsigned* count, size_t total,
|
||||
unsigned maxSymbolValue)
|
||||
unsigned maxSymbolValue, unsigned useLowProbCount)
|
||||
{
|
||||
/* Sanity checks */
|
||||
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
|
|
@ -443,8 +473,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|||
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
|
||||
|
||||
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
|
||||
short const lowProbCount = useLowProbCount ? -1 : 1;
|
||||
U64 const scale = 62 - tableLog;
|
||||
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
|
||||
U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
|
||||
U64 const vStep = 1ULL<<(scale-20);
|
||||
int stillToDistribute = 1<<tableLog;
|
||||
unsigned s;
|
||||
|
|
@ -456,7 +487,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|||
if (count[s] == total) return 0; /* rle special case */
|
||||
if (count[s] == 0) { normalizedCounter[s]=0; continue; }
|
||||
if (count[s] <= lowThreshold) {
|
||||
normalizedCounter[s] = -1;
|
||||
normalizedCounter[s] = lowProbCount;
|
||||
stillToDistribute--;
|
||||
} else {
|
||||
short proba = (short)((count[s]*step) >> scale);
|
||||
|
|
@ -470,7 +501,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|||
} }
|
||||
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
|
||||
/* corner case, need another normalization method */
|
||||
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
|
||||
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
}
|
||||
else normalizedCounter[largest] += (short)stillToDistribute;
|
||||
|
|
@ -493,40 +524,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|||
return tableLog;
|
||||
}
|
||||
|
||||
|
||||
/* fake FSE_CTable, for raw (uncompressed) input */
|
||||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
|
||||
{
|
||||
const unsigned tableSize = 1 << nbBits;
|
||||
const unsigned tableMask = tableSize - 1;
|
||||
const unsigned maxSymbolValue = tableMask;
|
||||
void* const ptr = ct;
|
||||
U16* const tableU16 = ( (U16*) ptr) + 2;
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
unsigned s;
|
||||
|
||||
/* Sanity checks */
|
||||
if (nbBits < 1) return ERROR(GENERIC); /* min size */
|
||||
|
||||
/* header */
|
||||
tableU16[-2] = (U16) nbBits;
|
||||
tableU16[-1] = (U16) maxSymbolValue;
|
||||
|
||||
/* Build table */
|
||||
for (s=0; s<tableSize; s++)
|
||||
tableU16[s] = (U16)(tableSize + s);
|
||||
|
||||
/* Build Symbol Transformation Table */
|
||||
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
symbolTT[s].deltaNbBits = deltaNbBits;
|
||||
symbolTT[s].deltaFindState = s-1;
|
||||
} }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fake FSE_CTable, for rle input (always same symbol) */
|
||||
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
|
||||
{
|
||||
|
|
@ -625,74 +622,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
|
|||
|
||||
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
|
||||
|
||||
/* FSE_compress_wksp() :
|
||||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` size must be `(1<<tableLog)`.
|
||||
*/
|
||||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* op = ostart;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
|
||||
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
|
||||
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
|
||||
FSE_CTable* CTable = (FSE_CTable*)workSpace;
|
||||
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
|
||||
void* scratchBuffer = (void*)(CTable + CTableSize);
|
||||
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
|
||||
|
||||
/* init conditions */
|
||||
if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
|
||||
if (srcSize <= 1) return 0; /* Not compressible */
|
||||
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
||||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
|
||||
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
||||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
||||
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
||||
}
|
||||
|
||||
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
|
||||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
|
||||
|
||||
/* Write table description header */
|
||||
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
|
||||
op += nc_err;
|
||||
}
|
||||
|
||||
/* Compress */
|
||||
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
|
||||
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
|
||||
if (cSize == 0) return 0; /* not enough space for compressed data */
|
||||
op += cSize;
|
||||
}
|
||||
|
||||
/* check compressibility */
|
||||
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
|
||||
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
||||
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
|
||||
} fseWkspMax_t;
|
||||
|
||||
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
fseWkspMax_t scratchBuffer;
|
||||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
||||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
||||
}
|
||||
|
||||
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
|
||||
}
|
||||
|
||||
|
||||
#endif /* FSE_COMMONDEFS_ONLY */
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/* ******************************************************************
|
||||
* FSE : Finite State Entropy decoder
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -16,13 +16,14 @@
|
|||
/* **************************************************************
|
||||
* Includes
|
||||
****************************************************************/
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include "debug.h" /* assert */
|
||||
#include "bitstream.h"
|
||||
#include "compiler.h"
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#include "error_private.h"
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy */
|
||||
#include "bits.h" /* ZSTD_highbit32 */
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
|
|
@ -54,30 +55,19 @@
|
|||
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
|
||||
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
|
||||
|
||||
|
||||
/* Function templates */
|
||||
FSE_DTable* FSE_createDTable (unsigned tableLog)
|
||||
{
|
||||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
||||
return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
|
||||
}
|
||||
|
||||
void FSE_freeDTable (FSE_DTable* dt)
|
||||
{
|
||||
free(dt);
|
||||
}
|
||||
|
||||
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
||||
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
|
||||
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
|
||||
U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
|
||||
U16* symbolNext = (U16*)workSpace;
|
||||
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
|
||||
|
||||
U32 const maxSV1 = maxSymbolValue + 1;
|
||||
U32 const tableSize = 1 << tableLog;
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
/* Sanity Checks */
|
||||
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
|
||||
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
|
||||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
|
||||
|
|
@ -93,13 +83,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
|||
symbolNext[s] = 1;
|
||||
} else {
|
||||
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
||||
symbolNext[s] = normalizedCounter[s];
|
||||
symbolNext[s] = (U16)normalizedCounter[s];
|
||||
} } }
|
||||
memcpy(dt, &DTableH, sizeof(DTableH));
|
||||
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 const tableMask = tableSize-1;
|
||||
if (highThreshold == tableSize - 1) {
|
||||
size_t const tableMask = tableSize-1;
|
||||
size_t const step = FSE_TABLESTEP(tableSize);
|
||||
/* First lay down the symbols in order.
|
||||
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
||||
* misses since small blocks generally have small table logs, so nearly
|
||||
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
||||
* our buffer to handle the over-write.
|
||||
*/
|
||||
{ U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
pos += (size_t)n;
|
||||
} }
|
||||
/* Now we spread those positions across the table.
|
||||
* The benefit of doing it in two stages is that we avoid the
|
||||
* variable size inner loop, which caused lots of branch misses.
|
||||
* Now we can run through all the positions without any branch misses.
|
||||
* We unroll the loop twice, since that is what empirically worked best.
|
||||
*/
|
||||
{
|
||||
size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2;
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableDecode[uPosition].symbol = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0);
|
||||
}
|
||||
} else {
|
||||
U32 const tableMask = tableSize-1;
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 s, position = 0;
|
||||
for (s=0; s<maxSV1; s++) {
|
||||
|
|
@ -117,62 +151,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
|||
for (u=0; u<tableSize; u++) {
|
||||
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
|
||||
U32 const nextState = symbolNext[symbol]++;
|
||||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
|
||||
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
||||
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
||||
} }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
|
||||
}
|
||||
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
/*-*******************************************************
|
||||
* Decompression (Byte symbols)
|
||||
*********************************************************/
|
||||
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
|
||||
{
|
||||
void* ptr = dt;
|
||||
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
||||
void* dPtr = dt + 1;
|
||||
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
|
||||
|
||||
DTableH->tableLog = 0;
|
||||
DTableH->fastMode = 0;
|
||||
|
||||
cell->newState = 0;
|
||||
cell->symbol = symbolValue;
|
||||
cell->nbBits = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
|
||||
{
|
||||
void* ptr = dt;
|
||||
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
||||
void* dPtr = dt + 1;
|
||||
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
|
||||
const unsigned tableSize = 1 << nbBits;
|
||||
const unsigned tableMask = tableSize - 1;
|
||||
const unsigned maxSV1 = tableMask+1;
|
||||
unsigned s;
|
||||
|
||||
/* Sanity checks */
|
||||
if (nbBits < 1) return ERROR(GENERIC); /* min size */
|
||||
|
||||
/* Build Decoding Table */
|
||||
DTableH->tableLog = (U16)nbBits;
|
||||
DTableH->fastMode = 1;
|
||||
for (s=0; s<maxSV1; s++) {
|
||||
dinfo[s].newState = 0;
|
||||
dinfo[s].symbol = (BYTE)s;
|
||||
dinfo[s].nbBits = (BYTE)nbBits;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
||||
void* dst, size_t maxDstSize,
|
||||
|
|
@ -233,54 +229,85 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
|||
break;
|
||||
} }
|
||||
|
||||
return op-ostart;
|
||||
assert(op >= ostart);
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
|
||||
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const FSE_DTable* dt)
|
||||
{
|
||||
const void* ptr = dt;
|
||||
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
||||
const U32 fastMode = DTableH->fastMode;
|
||||
|
||||
/* select fast mode (static) */
|
||||
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
|
||||
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
|
||||
}
|
||||
typedef struct {
|
||||
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
|
||||
} FSE_DecompressWksp;
|
||||
|
||||
|
||||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
|
||||
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
unsigned maxLog, void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*)cSrc;
|
||||
const BYTE* ip = istart;
|
||||
short counting[FSE_MAX_SYMBOL_VALUE+1];
|
||||
unsigned tableLog;
|
||||
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
||||
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
|
||||
size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
|
||||
FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
|
||||
|
||||
FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
|
||||
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
|
||||
|
||||
/* correct offset to dtable depends on this property */
|
||||
FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
|
||||
|
||||
/* normal FSE decoding mode */
|
||||
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
|
||||
if (FSE_isError(NCountLength)) return NCountLength;
|
||||
/* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
|
||||
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
|
||||
ip += NCountLength;
|
||||
cSrcSize -= NCountLength;
|
||||
{ size_t const NCountLength =
|
||||
FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
|
||||
if (FSE_isError(NCountLength)) return NCountLength;
|
||||
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
|
||||
assert(NCountLength <= cSrcSize);
|
||||
ip += NCountLength;
|
||||
cSrcSize -= NCountLength;
|
||||
}
|
||||
|
||||
CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
|
||||
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
|
||||
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
||||
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
||||
|
||||
return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
|
||||
CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
|
||||
|
||||
{
|
||||
const void* ptr = dtable;
|
||||
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
||||
const U32 fastMode = DTableH->fastMode;
|
||||
|
||||
/* select fast mode (static) */
|
||||
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
|
||||
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
|
||||
|
||||
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
|
||||
/* Avoids the FORCE_INLINE of the _body() function. */
|
||||
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
|
||||
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
||||
}
|
||||
|
||||
#endif /* FSE_COMMONDEFS_ONLY */
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* hist : Histogram functions
|
||||
* part of Finite State Entropy project
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -34,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
|||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
unsigned largestCount=0;
|
||||
|
||||
memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
|
||||
ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
|
||||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
||||
|
||||
while (ip<end) {
|
||||
|
|
@ -60,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
|
|||
* this design makes better use of OoO cpus,
|
||||
* and is noticeably faster when some values are heavily repeated.
|
||||
* But it needs some additional workspace for intermediate tables.
|
||||
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
|
||||
* `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
|
||||
* @return : largest histogram frequency,
|
||||
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
|
||||
* or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
|
||||
static size_t HIST_count_parallel_wksp(
|
||||
unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
|
|
@ -71,22 +71,21 @@ static size_t HIST_count_parallel_wksp(
|
|||
{
|
||||
const BYTE* ip = (const BYTE*)source;
|
||||
const BYTE* const iend = ip+sourceSize;
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
|
||||
unsigned max=0;
|
||||
U32* const Counting1 = workSpace;
|
||||
U32* const Counting2 = Counting1 + 256;
|
||||
U32* const Counting3 = Counting2 + 256;
|
||||
U32* const Counting4 = Counting3 + 256;
|
||||
|
||||
memset(workSpace, 0, 4*256*sizeof(unsigned));
|
||||
|
||||
/* safety checks */
|
||||
assert(*maxSymbolValuePtr <= 255);
|
||||
if (!sourceSize) {
|
||||
memset(count, 0, maxSymbolValue + 1);
|
||||
ZSTD_memset(count, 0, countSize);
|
||||
*maxSymbolValuePtr = 0;
|
||||
return 0;
|
||||
}
|
||||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
|
||||
ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
|
||||
|
||||
/* by stripes of 16 bytes */
|
||||
{ U32 cached = MEM_read32(ip); ip += 4;
|
||||
|
|
@ -118,21 +117,18 @@ static size_t HIST_count_parallel_wksp(
|
|||
/* finish last symbols */
|
||||
while (ip<iend) Counting1[*ip++]++;
|
||||
|
||||
if (check) { /* verify stats will fit into destination table */
|
||||
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
||||
} }
|
||||
|
||||
{ U32 s;
|
||||
if (maxSymbolValue > 255) maxSymbolValue = 255;
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (count[s] > max) max = count[s];
|
||||
for (s=0; s<256; s++) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s] > max) max = Counting1[s];
|
||||
} }
|
||||
|
||||
while (!count[maxSymbolValue]) maxSymbolValue--;
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
{ unsigned maxSymbolValue = 255;
|
||||
while (!Counting1[maxSymbolValue]) maxSymbolValue--;
|
||||
if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
|
||||
}
|
||||
return (size_t)max;
|
||||
}
|
||||
|
||||
|
|
@ -152,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
|||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
|
||||
}
|
||||
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
|
||||
/* HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
|
||||
|
|
@ -175,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
|||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
|
||||
}
|
||||
|
||||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
|
||||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* hist : Histogram functions
|
||||
* part of Finite State Entropy project
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
****************************************************************** */
|
||||
|
||||
/* --- dependencies --- */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
/* --- simple histogram functions --- */
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* ******************************************************************
|
||||
* huff0 huffman codec,
|
||||
* part of Finite State Entropy library
|
||||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
|
|
@ -20,105 +20,30 @@ extern "C" {
|
|||
#define HUF_H_298734234
|
||||
|
||||
/* *** Dependencies *** */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/* *** library symbols visibility *** */
|
||||
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
|
||||
* HUF symbols remain "private" (internal symbols for library only).
|
||||
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
|
||||
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
|
||||
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
|
||||
# define HUF_PUBLIC_API __declspec(dllexport)
|
||||
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
|
||||
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
|
||||
#else
|
||||
# define HUF_PUBLIC_API
|
||||
#endif
|
||||
|
||||
|
||||
/* ========================== */
|
||||
/* *** simple functions *** */
|
||||
/* ========================== */
|
||||
|
||||
/** HUF_compress() :
|
||||
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
|
||||
* 'dst' buffer must be already allocated.
|
||||
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
|
||||
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
|
||||
* @return : size of compressed data (<= `dstCapacity`).
|
||||
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
|
||||
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
|
||||
*/
|
||||
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/** HUF_decompress() :
|
||||
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
|
||||
* into already allocated buffer 'dst', of minimum size 'dstSize'.
|
||||
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
|
||||
* Note : in contrast with FSE, HUF_decompress can regenerate
|
||||
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
|
||||
* because it knows size to regenerate (originalSize).
|
||||
* @return : size of regenerated data (== originalSize),
|
||||
* or an error code, which can be tested using HUF_isError()
|
||||
*/
|
||||
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
|
||||
const void* cSrc, size_t cSrcSize);
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "mem.h" /* U32 */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
|
||||
|
||||
/* *** Tool functions *** */
|
||||
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
|
||||
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
|
||||
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
|
||||
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
|
||||
|
||||
/* Error Management */
|
||||
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
|
||||
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
|
||||
unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
|
||||
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
|
||||
|
||||
|
||||
/* *** Advanced function *** */
|
||||
|
||||
/** HUF_compress2() :
|
||||
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
|
||||
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
|
||||
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
|
||||
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
/** HUF_compress4X_wksp() :
|
||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
|
||||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
|
||||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
|
||||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize);
|
||||
|
||||
#endif /* HUF_H_298734234 */
|
||||
|
||||
/* ******************************************************************
|
||||
* WARNING !!
|
||||
* The following section contains advanced and experimental definitions
|
||||
* which shall never be used in the context of a dynamic library,
|
||||
* because they are not guaranteed to remain stable in the future.
|
||||
* Only consider them in association with static linking.
|
||||
* *****************************************************************/
|
||||
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
|
||||
#define HUF_H_HUF_STATIC_LINKING_ONLY
|
||||
|
||||
/* *** Dependencies *** */
|
||||
#include "mem.h" /* U32 */
|
||||
|
||||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
|
||||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
|
||||
|
||||
/* *** Constants *** */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
|
||||
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
|
||||
#define HUF_SYMBOLVALUE_MAX 255
|
||||
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
|
||||
# error "HUF_TABLELOG_MAX is too large !"
|
||||
#endif
|
||||
|
|
@ -133,12 +58,12 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
|||
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
||||
|
||||
/* static allocation of HUF's Compression Table */
|
||||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
|
||||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
|
||||
typedef size_t HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
|
||||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
|
||||
U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
|
||||
void* name##hv = &(name##hb); \
|
||||
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
|
||||
|
||||
/* static allocation of HUF's DTable */
|
||||
typedef U32 HUF_DTable;
|
||||
|
|
@ -152,25 +77,49 @@ typedef U32 HUF_DTable;
|
|||
/* ****************************************
|
||||
* Advanced decompression functions
|
||||
******************************************/
|
||||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
|
||||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
|
||||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
|
||||
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
/**
|
||||
* Huffman flags bitset.
|
||||
* For all flags, 0 is the default value.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
|
||||
* Otherwise: Ignored.
|
||||
*/
|
||||
HUF_flags_bmi2 = (1 << 0),
|
||||
/**
|
||||
* If set: Test possible table depths to find the one that produces the smallest header + encoded size.
|
||||
* If unset: Use heuristic to find the table depth.
|
||||
*/
|
||||
HUF_flags_optimalDepth = (1 << 1),
|
||||
/**
|
||||
* If set: If the previous table can encode the input, always reuse the previous table.
|
||||
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
|
||||
*/
|
||||
HUF_flags_preferRepeat = (1 << 2),
|
||||
/**
|
||||
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
|
||||
* If unset: Always histogram the entire input.
|
||||
*/
|
||||
HUF_flags_suspectUncompressible = (1 << 3),
|
||||
/**
|
||||
* If set: Don't use assembly implementations
|
||||
* If unset: Allow using assembly implementations
|
||||
*/
|
||||
HUF_flags_disableAsm = (1 << 4),
|
||||
/**
|
||||
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
|
||||
* If unset: Use the fast decoding loop when possible.
|
||||
*/
|
||||
HUF_flags_disableFast = (1 << 5)
|
||||
} HUF_flags_e;
|
||||
|
||||
|
||||
/* ****************************************
|
||||
* HUF detailed API
|
||||
* ****************************************/
|
||||
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
|
||||
|
||||
/*! HUF_compress() does the following:
|
||||
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
|
||||
|
|
@ -183,11 +132,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|||
* For example, it's possible to compress several blocks using the same 'CTable',
|
||||
* or to save and regenerate 'CTable' using external methods.
|
||||
*/
|
||||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
|
||||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
unsigned HUF_minTableLog(unsigned symbolCardinality);
|
||||
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
|
||||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
|
||||
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
|
||||
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
|
||||
|
|
@ -196,22 +146,24 @@ typedef enum {
|
|||
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
|
||||
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
|
||||
} HUF_repeat;
|
||||
|
||||
/** HUF_compress4X_repeat() :
|
||||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
|
||||
|
||||
/** HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
|
||||
*/
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
|
||||
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
||||
|
|
@ -226,15 +178,40 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
|
|||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/*! HUF_readStats_wksp() :
|
||||
* Same as HUF_readStats() but takes an external workspace which must be
|
||||
* 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
|
||||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
|
||||
*/
|
||||
#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
|
||||
#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
|
||||
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
|
||||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workspace, size_t wkspSize,
|
||||
int flags);
|
||||
|
||||
/** HUF_readCTable() :
|
||||
* Loading a CTable saved with HUF_writeCTable() */
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
|
||||
|
||||
/** HUF_getNbBits() :
|
||||
/** HUF_getNbBitsFromCTable() :
|
||||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private
|
||||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
|
||||
* Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
|
||||
* Note 2 : is not inlined, as HUF_CElt definition is private
|
||||
*/
|
||||
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
|
||||
|
||||
typedef struct {
|
||||
BYTE tableLog;
|
||||
BYTE maxSymbolValue;
|
||||
BYTE unused[sizeof(size_t) - 2];
|
||||
} HUF_CTableHeader;
|
||||
|
||||
/** HUF_readCTableHeader() :
|
||||
* @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
|
||||
*/
|
||||
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
|
||||
|
||||
/*
|
||||
* HUF_decompress() does the following:
|
||||
|
|
@ -260,80 +237,49 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
|
|||
* a required workspace size greater than that specified in the following
|
||||
* macro.
|
||||
*/
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
|
||||
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
|
||||
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
|
||||
|
||||
/* ====================== */
|
||||
/* single stream variants */
|
||||
/* ====================== */
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
|
||||
/** HUF_compress1X_repeat() :
|
||||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
/* BMI2 variants.
|
||||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
|
||||
*/
|
||||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#endif
|
||||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
|
||||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
|
||||
#endif
|
||||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
|
||||
#endif /* HUF_STATIC_LINKING_ONLY */
|
||||
#endif /* HUF_H_298734234 */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
595
uppsrc/plugin/zstd/lib/huf_decompress_amd64.S
Normal file
595
uppsrc/plugin/zstd/lib/huf_decompress_amd64.S
Normal file
|
|
@ -0,0 +1,595 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include "../common/portability_macros.h"
|
||||
|
||||
#if defined(__ELF__) && defined(__GNUC__)
|
||||
/* Stack marking
|
||||
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
|
||||
*/
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
#if defined(__aarch64__)
|
||||
/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
|
||||
* See: https://github.com/facebook/zstd/issues/3841
|
||||
* See: https://gcc.godbolt.org/z/sqr5T4ffK
|
||||
* See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
|
||||
* See: https://reviews.llvm.org/D62609
|
||||
*/
|
||||
.pushsection .note.gnu.property, "a"
|
||||
.p2align 3
|
||||
.long 4 /* size of the name - "GNU\0" */
|
||||
.long 0x10 /* size of descriptor */
|
||||
.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */
|
||||
.asciz "GNU"
|
||||
.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
|
||||
.long 4 /* pr_datasz - 4 bytes */
|
||||
.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
|
||||
.p2align 3 /* pr_padding - bring everything to 8 byte alignment */
|
||||
.popsection
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
|
||||
/* Calling convention:
|
||||
*
|
||||
* %rdi contains the first argument: HUF_DecompressAsmArgs*.
|
||||
* %rbp isn't maintained (no frame pointer).
|
||||
* %rsp contains the stack pointer that grows down.
|
||||
* No red-zone is assumed, only addresses >= %rsp are used.
|
||||
* All register contents are preserved.
|
||||
*
|
||||
* TODO: Support Windows calling convention.
|
||||
*/
|
||||
|
||||
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
|
||||
.global HUF_decompress4X1_usingDTable_internal_fast_asm_loop
|
||||
.global HUF_decompress4X2_usingDTable_internal_fast_asm_loop
|
||||
.global _HUF_decompress4X1_usingDTable_internal_fast_asm_loop
|
||||
.global _HUF_decompress4X2_usingDTable_internal_fast_asm_loop
|
||||
.text
|
||||
|
||||
/* Sets up register mappings for clarity.
|
||||
* op[], bits[], dtable & ip[0] each get their own register.
|
||||
* ip[1,2,3] & olimit alias var[].
|
||||
* %rax is a scratch register.
|
||||
*/
|
||||
|
||||
#define op0 rsi
|
||||
#define op1 rbx
|
||||
#define op2 rcx
|
||||
#define op3 rdi
|
||||
|
||||
#define ip0 r8
|
||||
#define ip1 r9
|
||||
#define ip2 r10
|
||||
#define ip3 r11
|
||||
|
||||
#define bits0 rbp
|
||||
#define bits1 rdx
|
||||
#define bits2 r12
|
||||
#define bits3 r13
|
||||
#define dtable r14
|
||||
#define olimit r15
|
||||
|
||||
/* var[] aliases ip[1,2,3] & olimit
|
||||
* ip[1,2,3] are saved every iteration.
|
||||
* olimit is only used in compute_olimit.
|
||||
*/
|
||||
#define var0 r15
|
||||
#define var1 r9
|
||||
#define var2 r10
|
||||
#define var3 r11
|
||||
|
||||
/* 32-bit var registers */
|
||||
#define vard0 r15d
|
||||
#define vard1 r9d
|
||||
#define vard2 r10d
|
||||
#define vard3 r11d
|
||||
|
||||
/* Calls X(N) for each stream 0, 1, 2, 3. */
|
||||
#define FOR_EACH_STREAM(X) \
|
||||
X(0); \
|
||||
X(1); \
|
||||
X(2); \
|
||||
X(3)
|
||||
|
||||
/* Calls X(N, idx) for each stream 0, 1, 2, 3. */
|
||||
#define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
|
||||
X(0, idx); \
|
||||
X(1, idx); \
|
||||
X(2, idx); \
|
||||
X(3, idx)
|
||||
|
||||
/* Define both _HUF_* & HUF_* symbols because MacOS
|
||||
* C symbols are prefixed with '_' & Linux symbols aren't.
|
||||
*/
|
||||
_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
ZSTD_CET_ENDBRANCH
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
push %rbx
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rbp
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
/* Read HUF_DecompressAsmArgs* args from %rax */
|
||||
movq %rdi, %rax
|
||||
movq 0(%rax), %ip0
|
||||
movq 8(%rax), %ip1
|
||||
movq 16(%rax), %ip2
|
||||
movq 24(%rax), %ip3
|
||||
movq 32(%rax), %op0
|
||||
movq 40(%rax), %op1
|
||||
movq 48(%rax), %op2
|
||||
movq 56(%rax), %op3
|
||||
movq 64(%rax), %bits0
|
||||
movq 72(%rax), %bits1
|
||||
movq 80(%rax), %bits2
|
||||
movq 88(%rax), %bits3
|
||||
movq 96(%rax), %dtable
|
||||
push %rax /* argument */
|
||||
push 104(%rax) /* ilowest */
|
||||
push 112(%rax) /* oend */
|
||||
push %olimit /* olimit space */
|
||||
|
||||
subq $24, %rsp
|
||||
|
||||
.L_4X1_compute_olimit:
|
||||
/* Computes how many iterations we can do safely
|
||||
* %r15, %rax may be clobbered
|
||||
* rbx, rdx must be saved
|
||||
* op3 & ip0 mustn't be clobbered
|
||||
*/
|
||||
movq %rbx, 0(%rsp)
|
||||
movq %rdx, 8(%rsp)
|
||||
|
||||
movq 32(%rsp), %rax /* rax = oend */
|
||||
subq %op3, %rax /* rax = oend - op3 */
|
||||
|
||||
/* r15 = (oend - op3) / 5 */
|
||||
movabsq $-3689348814741910323, %rdx
|
||||
mulq %rdx
|
||||
movq %rdx, %r15
|
||||
shrq $2, %r15
|
||||
|
||||
movq %ip0, %rax /* rax = ip0 */
|
||||
movq 40(%rsp), %rdx /* rdx = ilowest */
|
||||
subq %rdx, %rax /* rax = ip0 - ilowest */
|
||||
movq %rax, %rbx /* rbx = ip0 - ilowest */
|
||||
|
||||
/* rdx = (ip0 - ilowest) / 7 */
|
||||
movabsq $2635249153387078803, %rdx
|
||||
mulq %rdx
|
||||
subq %rdx, %rbx
|
||||
shrq %rbx
|
||||
addq %rbx, %rdx
|
||||
shrq $2, %rdx
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
/* r15 = r15 * 5 */
|
||||
leaq (%r15, %r15, 4), %r15
|
||||
|
||||
/* olimit = op3 + r15 */
|
||||
addq %op3, %olimit
|
||||
|
||||
movq 8(%rsp), %rdx
|
||||
movq 0(%rsp), %rbx
|
||||
|
||||
/* If (op3 + 20 > olimit) */
|
||||
movq %op3, %rax /* rax = op3 */
|
||||
cmpq %rax, %olimit /* op3 == olimit */
|
||||
je .L_4X1_exit
|
||||
|
||||
/* If (ip1 < ip0) go to exit */
|
||||
cmpq %ip0, %ip1
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* If (ip2 < ip1) go to exit */
|
||||
cmpq %ip1, %ip2
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* If (ip3 < ip2) go to exit */
|
||||
cmpq %ip2, %ip3
|
||||
jb .L_4X1_exit
|
||||
|
||||
/* Reads top 11 bits from bits[n]
|
||||
* Loads dt[bits[n]] into var[n]
|
||||
*/
|
||||
#define GET_NEXT_DELT(n) \
|
||||
movq $53, %var##n; \
|
||||
shrxq %var##n, %bits##n, %var##n; \
|
||||
movzwl (%dtable,%var##n,2),%vard##n
|
||||
|
||||
/* var[n] must contain the DTable entry computed with GET_NEXT_DELT
|
||||
* Moves var[n] to %rax
|
||||
* bits[n] <<= var[n] & 63
|
||||
* op[n][idx] = %rax >> 8
|
||||
* %ah is a way to access bits [8, 16) of %rax
|
||||
*/
|
||||
#define DECODE_FROM_DELT(n, idx) \
|
||||
movq %var##n, %rax; \
|
||||
shlxq %var##n, %bits##n, %bits##n; \
|
||||
movb %ah, idx(%op##n)
|
||||
|
||||
/* Assumes GET_NEXT_DELT has been called.
|
||||
* Calls DECODE_FROM_DELT then GET_NEXT_DELT
|
||||
*/
|
||||
#define DECODE_AND_GET_NEXT(n, idx) \
|
||||
DECODE_FROM_DELT(n, idx); \
|
||||
GET_NEXT_DELT(n) \
|
||||
|
||||
/* // ctz & nbBytes is stored in bits[n]
|
||||
* // nbBits is stored in %rax
|
||||
* ctz = CTZ[bits[n]]
|
||||
* nbBits = ctz & 7
|
||||
* nbBytes = ctz >> 3
|
||||
* op[n] += 5
|
||||
* ip[n] -= nbBytes
|
||||
* // Note: x86-64 is little-endian ==> no bswap
|
||||
* bits[n] = MEM_readST(ip[n]) | 1
|
||||
* bits[n] <<= nbBits
|
||||
*/
|
||||
#define RELOAD_BITS(n) \
|
||||
bsfq %bits##n, %bits##n; \
|
||||
movq %bits##n, %rax; \
|
||||
andq $7, %rax; \
|
||||
shrq $3, %bits##n; \
|
||||
leaq 5(%op##n), %op##n; \
|
||||
subq %bits##n, %ip##n; \
|
||||
movq (%ip##n), %bits##n; \
|
||||
orq $1, %bits##n; \
|
||||
shlx %rax, %bits##n, %bits##n
|
||||
|
||||
/* Store clobbered variables on the stack */
|
||||
movq %olimit, 24(%rsp)
|
||||
movq %ip1, 0(%rsp)
|
||||
movq %ip2, 8(%rsp)
|
||||
movq %ip3, 16(%rsp)
|
||||
|
||||
/* Call GET_NEXT_DELT for each stream */
|
||||
FOR_EACH_STREAM(GET_NEXT_DELT)
|
||||
|
||||
.p2align 6
|
||||
|
||||
.L_4X1_loop_body:
|
||||
/* Decode 5 symbols in each of the 4 streams (20 total)
|
||||
* Must have called GET_NEXT_DELT for each stream
|
||||
*/
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
|
||||
|
||||
/* Load ip[1,2,3] from stack (var[] aliases them)
|
||||
* ip[] is needed for RELOAD_BITS
|
||||
* Each will be stored back to the stack after RELOAD
|
||||
*/
|
||||
movq 0(%rsp), %ip1
|
||||
movq 8(%rsp), %ip2
|
||||
movq 16(%rsp), %ip3
|
||||
|
||||
/* Reload each stream & fetch the next table entry
|
||||
* to prepare for the next iteration
|
||||
*/
|
||||
RELOAD_BITS(0)
|
||||
GET_NEXT_DELT(0)
|
||||
|
||||
RELOAD_BITS(1)
|
||||
movq %ip1, 0(%rsp)
|
||||
GET_NEXT_DELT(1)
|
||||
|
||||
RELOAD_BITS(2)
|
||||
movq %ip2, 8(%rsp)
|
||||
GET_NEXT_DELT(2)
|
||||
|
||||
RELOAD_BITS(3)
|
||||
movq %ip3, 16(%rsp)
|
||||
GET_NEXT_DELT(3)
|
||||
|
||||
/* If op3 < olimit: continue the loop */
|
||||
cmp %op3, 24(%rsp)
|
||||
ja .L_4X1_loop_body
|
||||
|
||||
/* Reload ip[1,2,3] from stack */
|
||||
movq 0(%rsp), %ip1
|
||||
movq 8(%rsp), %ip2
|
||||
movq 16(%rsp), %ip3
|
||||
|
||||
/* Re-compute olimit */
|
||||
jmp .L_4X1_compute_olimit
|
||||
|
||||
#undef GET_NEXT_DELT
|
||||
#undef DECODE_FROM_DELT
|
||||
#undef DECODE
|
||||
#undef RELOAD_BITS
|
||||
.L_4X1_exit:
|
||||
addq $24, %rsp
|
||||
|
||||
/* Restore stack (oend & olimit) */
|
||||
pop %rax /* olimit */
|
||||
pop %rax /* oend */
|
||||
pop %rax /* ilowest */
|
||||
pop %rax /* arg */
|
||||
|
||||
/* Save ip / op / bits */
|
||||
movq %ip0, 0(%rax)
|
||||
movq %ip1, 8(%rax)
|
||||
movq %ip2, 16(%rax)
|
||||
movq %ip3, 24(%rax)
|
||||
movq %op0, 32(%rax)
|
||||
movq %op1, 40(%rax)
|
||||
movq %op2, 48(%rax)
|
||||
movq %op3, 56(%rax)
|
||||
movq %bits0, 64(%rax)
|
||||
movq %bits1, 72(%rax)
|
||||
movq %bits2, 80(%rax)
|
||||
movq %bits3, 88(%rax)
|
||||
|
||||
/* Restore registers */
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rbx
|
||||
pop %rax
|
||||
ret
|
||||
|
||||
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
ZSTD_CET_ENDBRANCH
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
push %rbx
|
||||
push %rcx
|
||||
push %rdx
|
||||
push %rbp
|
||||
push %rsi
|
||||
push %rdi
|
||||
push %r8
|
||||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
movq %rdi, %rax
|
||||
movq 0(%rax), %ip0
|
||||
movq 8(%rax), %ip1
|
||||
movq 16(%rax), %ip2
|
||||
movq 24(%rax), %ip3
|
||||
movq 32(%rax), %op0
|
||||
movq 40(%rax), %op1
|
||||
movq 48(%rax), %op2
|
||||
movq 56(%rax), %op3
|
||||
movq 64(%rax), %bits0
|
||||
movq 72(%rax), %bits1
|
||||
movq 80(%rax), %bits2
|
||||
movq 88(%rax), %bits3
|
||||
movq 96(%rax), %dtable
|
||||
push %rax /* argument */
|
||||
push %rax /* olimit */
|
||||
push 104(%rax) /* ilowest */
|
||||
|
||||
movq 112(%rax), %rax
|
||||
push %rax /* oend3 */
|
||||
|
||||
movq %op3, %rax
|
||||
push %rax /* oend2 */
|
||||
|
||||
movq %op2, %rax
|
||||
push %rax /* oend1 */
|
||||
|
||||
movq %op1, %rax
|
||||
push %rax /* oend0 */
|
||||
|
||||
/* Scratch space */
|
||||
subq $8, %rsp
|
||||
|
||||
.L_4X2_compute_olimit:
|
||||
/* Computes how many iterations we can do safely
|
||||
* %r15, %rax may be clobbered
|
||||
* rdx must be saved
|
||||
* op[1,2,3,4] & ip0 mustn't be clobbered
|
||||
*/
|
||||
movq %rdx, 0(%rsp)
|
||||
|
||||
/* We can consume up to 7 input bytes each iteration. */
|
||||
movq %ip0, %rax /* rax = ip0 */
|
||||
movq 40(%rsp), %rdx /* rdx = ilowest */
|
||||
subq %rdx, %rax /* rax = ip0 - ilowest */
|
||||
movq %rax, %r15 /* r15 = ip0 - ilowest */
|
||||
|
||||
/* rdx = rax / 7 */
|
||||
movabsq $2635249153387078803, %rdx
|
||||
mulq %rdx
|
||||
subq %rdx, %r15
|
||||
shrq %r15
|
||||
addq %r15, %rdx
|
||||
shrq $2, %rdx
|
||||
|
||||
/* r15 = (ip0 - ilowest) / 7 */
|
||||
movq %rdx, %r15
|
||||
|
||||
/* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
|
||||
movq 8(%rsp), %rax /* rax = oend0 */
|
||||
subq %op0, %rax /* rax = oend0 - op0 */
|
||||
movq 16(%rsp), %rdx /* rdx = oend1 */
|
||||
subq %op1, %rdx /* rdx = oend1 - op1 */
|
||||
|
||||
cmpq %rax, %rdx
|
||||
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
|
||||
|
||||
movq 24(%rsp), %rax /* rax = oend2 */
|
||||
subq %op2, %rax /* rax = oend2 - op2 */
|
||||
|
||||
cmpq %rax, %rdx
|
||||
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
|
||||
|
||||
movq 32(%rsp), %rax /* rax = oend3 */
|
||||
subq %op3, %rax /* rax = oend3 - op3 */
|
||||
|
||||
cmpq %rax, %rdx
|
||||
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
|
||||
|
||||
movabsq $-3689348814741910323, %rax
|
||||
mulq %rdx
|
||||
shrq $3, %rdx /* rdx = rdx / 10 */
|
||||
|
||||
/* r15 = min(%rdx, %r15) */
|
||||
cmpq %rdx, %r15
|
||||
cmova %rdx, %r15
|
||||
|
||||
/* olimit = op3 + 5 * r15 */
|
||||
movq %r15, %rax
|
||||
leaq (%op3, %rax, 4), %olimit
|
||||
addq %rax, %olimit
|
||||
|
||||
movq 0(%rsp), %rdx
|
||||
|
||||
/* If (op3 + 10 > olimit) */
|
||||
movq %op3, %rax /* rax = op3 */
|
||||
cmpq %rax, %olimit /* op3 == olimit */
|
||||
je .L_4X2_exit
|
||||
|
||||
/* If (ip1 < ip0) go to exit */
|
||||
cmpq %ip0, %ip1
|
||||
jb .L_4X2_exit
|
||||
|
||||
/* If (ip2 < ip1) go to exit */
|
||||
cmpq %ip1, %ip2
|
||||
jb .L_4X2_exit
|
||||
|
||||
/* If (ip3 < ip2) go to exit */
|
||||
cmpq %ip2, %ip3
|
||||
jb .L_4X2_exit
|
||||
|
||||
#define DECODE(n, idx) \
|
||||
movq %bits##n, %rax; \
|
||||
shrq $53, %rax; \
|
||||
movzwl 0(%dtable,%rax,4),%r8d; \
|
||||
movzbl 2(%dtable,%rax,4),%r15d; \
|
||||
movzbl 3(%dtable,%rax,4),%eax; \
|
||||
movw %r8w, (%op##n); \
|
||||
shlxq %r15, %bits##n, %bits##n; \
|
||||
addq %rax, %op##n
|
||||
|
||||
#define RELOAD_BITS(n) \
|
||||
bsfq %bits##n, %bits##n; \
|
||||
movq %bits##n, %rax; \
|
||||
shrq $3, %bits##n; \
|
||||
andq $7, %rax; \
|
||||
subq %bits##n, %ip##n; \
|
||||
movq (%ip##n), %bits##n; \
|
||||
orq $1, %bits##n; \
|
||||
shlxq %rax, %bits##n, %bits##n
|
||||
|
||||
|
||||
movq %olimit, 48(%rsp)
|
||||
|
||||
.p2align 6
|
||||
|
||||
.L_4X2_loop_body:
|
||||
/* We clobber r8, so store it on the stack */
|
||||
movq %r8, 0(%rsp)
|
||||
|
||||
/* Decode 5 symbols from each of the 4 streams (20 symbols total). */
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
|
||||
FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
|
||||
|
||||
/* Reload r8 */
|
||||
movq 0(%rsp), %r8
|
||||
|
||||
FOR_EACH_STREAM(RELOAD_BITS)
|
||||
|
||||
cmp %op3, 48(%rsp)
|
||||
ja .L_4X2_loop_body
|
||||
jmp .L_4X2_compute_olimit
|
||||
|
||||
#undef DECODE
|
||||
#undef RELOAD_BITS
|
||||
.L_4X2_exit:
|
||||
addq $8, %rsp
|
||||
/* Restore stack (oend & olimit) */
|
||||
pop %rax /* oend0 */
|
||||
pop %rax /* oend1 */
|
||||
pop %rax /* oend2 */
|
||||
pop %rax /* oend3 */
|
||||
pop %rax /* ilowest */
|
||||
pop %rax /* olimit */
|
||||
pop %rax /* arg */
|
||||
|
||||
/* Save ip / op / bits */
|
||||
movq %ip0, 0(%rax)
|
||||
movq %ip1, 8(%rax)
|
||||
movq %ip2, 16(%rax)
|
||||
movq %ip3, 24(%rax)
|
||||
movq %op0, 32(%rax)
|
||||
movq %op1, 40(%rax)
|
||||
movq %op2, 48(%rax)
|
||||
movq %op3, 56(%rax)
|
||||
movq %bits0, 64(%rax)
|
||||
movq %bits1, 72(%rax)
|
||||
movq %bits2, 80(%rax)
|
||||
movq %bits3, 88(%rax)
|
||||
|
||||
/* Restore registers */
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rdi
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rbx
|
||||
pop %rax
|
||||
ret
|
||||
|
||||
#endif
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -18,8 +18,10 @@ extern "C" {
|
|||
/*-****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include <string.h> /* memcpy */
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include "compiler.h" /* __has_builtin */
|
||||
#include "debug.h" /* DEBUG_STATIC_ASSERT */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy */
|
||||
|
||||
|
||||
/*-****************************************
|
||||
|
|
@ -29,104 +31,19 @@ extern "C" {
|
|||
# include <stdlib.h> /* _byteswap_ulong */
|
||||
# include <intrin.h> /* _byteswap_* */
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define MEM_STATIC static __inline __attribute__((unused))
|
||||
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
# define MEM_STATIC static inline
|
||||
#elif defined(_MSC_VER)
|
||||
# define MEM_STATIC static __inline
|
||||
#else
|
||||
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
|
||||
#endif
|
||||
|
||||
/* code only tested on 32 and 64 bits systems */
|
||||
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
|
||||
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#if defined (__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# define MEMORY_SANITIZER 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined (MEMORY_SANITIZER)
|
||||
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
|
||||
#include <stdint.h> /* intptr_t */
|
||||
|
||||
/* Make memory region fully initialized (without changing its contents). */
|
||||
void __msan_unpoison(const volatile void *a, size_t size);
|
||||
|
||||
/* Make memory region fully uninitialized (without changing its contents).
|
||||
This is a legacy interface that does not update origin information. Use
|
||||
__msan_allocated_memory() instead. */
|
||||
void __msan_poison(const volatile void *a, size_t size);
|
||||
|
||||
/* Returns the offset of the first (at least partially) poisoned byte in the
|
||||
memory range, or -1 if the whole range is good. */
|
||||
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
#if defined (__has_feature)
|
||||
# if __has_feature(address_sanitizer)
|
||||
# define ADDRESS_SANITIZER 1
|
||||
# endif
|
||||
#elif defined(__SANITIZE_ADDRESS__)
|
||||
# define ADDRESS_SANITIZER 1
|
||||
#endif
|
||||
|
||||
#if defined (ADDRESS_SANITIZER)
|
||||
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
||||
* We therefore declare the functions we need ourselves, rather than trying to
|
||||
* include the header file... */
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Instrumented
|
||||
* code is forbidden from accessing addresses in this region until it is
|
||||
* unpoisoned. This function is not guaranteed to poison the entire region -
|
||||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
|
||||
* alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can poison or
|
||||
* unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_poison_memory_region(void const volatile *addr, size_t size);
|
||||
|
||||
/**
|
||||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
|
||||
*
|
||||
* This memory must be previously allocated by your program. Accessing
|
||||
* addresses in this region is allowed until this region is poisoned again.
|
||||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
|
||||
* to ASan alignment restrictions.
|
||||
*
|
||||
* \note This function is not thread-safe because no two threads can
|
||||
* poison or unpoison memory in the same memory region simultaneously.
|
||||
*
|
||||
* \param addr Start of memory region.
|
||||
* \param size Size of memory region. */
|
||||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
||||
#endif
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Basic Types
|
||||
*****************************************************************/
|
||||
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
||||
# include <stdint.h>
|
||||
# if defined(_AIX)
|
||||
# include <inttypes.h>
|
||||
# else
|
||||
# include <stdint.h> /* intptr_t */
|
||||
# endif
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint8_t U8;
|
||||
typedef int8_t S8;
|
||||
typedef uint16_t U16;
|
||||
typedef int16_t S16;
|
||||
typedef uint32_t U32;
|
||||
|
|
@ -139,6 +56,8 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
|||
# error "this implementation requires char to be exactly 8-bit type"
|
||||
#endif
|
||||
typedef unsigned char BYTE;
|
||||
typedef unsigned char U8;
|
||||
typedef signed char S8;
|
||||
#if USHRT_MAX != 65535
|
||||
# error "this implementation requires short to be exactly 16-bit type"
|
||||
#endif
|
||||
|
|
@ -157,25 +76,63 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
|||
|
||||
|
||||
/*-**************************************************************
|
||||
* Memory I/O
|
||||
* Memory I/O API
|
||||
*****************************************************************/
|
||||
/* MEM_FORCE_MEMORY_ACCESS :
|
||||
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
||||
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
|
||||
* The below switch allow to select different access method for improved performance.
|
||||
* Method 0 (default) : use `memcpy()`. Safe and portable.
|
||||
* Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
|
||||
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
|
||||
/*=== Static platform detection ===*/
|
||||
MEM_STATIC unsigned MEM_32bits(void);
|
||||
MEM_STATIC unsigned MEM_64bits(void);
|
||||
MEM_STATIC unsigned MEM_isLittleEndian(void);
|
||||
|
||||
/*=== Native unaligned read/write ===*/
|
||||
MEM_STATIC U16 MEM_read16(const void* memPtr);
|
||||
MEM_STATIC U32 MEM_read32(const void* memPtr);
|
||||
MEM_STATIC U64 MEM_read64(const void* memPtr);
|
||||
MEM_STATIC size_t MEM_readST(const void* memPtr);
|
||||
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value);
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value);
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value);
|
||||
|
||||
/*=== Little endian unaligned read/write ===*/
|
||||
MEM_STATIC U16 MEM_readLE16(const void* memPtr);
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr);
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr);
|
||||
MEM_STATIC U64 MEM_readLE64(const void* memPtr);
|
||||
MEM_STATIC size_t MEM_readLEST(const void* memPtr);
|
||||
|
||||
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
|
||||
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
|
||||
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
|
||||
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
|
||||
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
|
||||
|
||||
/*=== Big endian unaligned read/write ===*/
|
||||
MEM_STATIC U32 MEM_readBE32(const void* memPtr);
|
||||
MEM_STATIC U64 MEM_readBE64(const void* memPtr);
|
||||
MEM_STATIC size_t MEM_readBEST(const void* memPtr);
|
||||
|
||||
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
|
||||
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
|
||||
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
|
||||
|
||||
/*=== Byteswap ===*/
|
||||
MEM_STATIC U32 MEM_swap32(U32 in);
|
||||
MEM_STATIC U64 MEM_swap64(U64 in);
|
||||
MEM_STATIC size_t MEM_swapST(size_t in);
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Memory I/O Implementation
|
||||
*****************************************************************/
|
||||
/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
|
||||
* Method 0 : always use `memcpy()`. Safe and portable.
|
||||
* Method 1 : Use compiler extension to set unaligned access.
|
||||
* Method 2 : direct access. This method is portable but violate C standard.
|
||||
* It can generate buggy code on targets depending on alignment.
|
||||
* In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
|
||||
* See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
|
||||
* Prefer these methods in priority order (0 > 1 > 2)
|
||||
* Default : method 1 if supported, else method 0
|
||||
*/
|
||||
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
||||
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
|
||||
# define MEM_FORCE_MEMORY_ACCESS 2
|
||||
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
|
||||
# ifdef __GNUC__
|
||||
# define MEM_FORCE_MEMORY_ACCESS 1
|
||||
# endif
|
||||
#endif
|
||||
|
|
@ -185,8 +142,22 @@ MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
|
|||
|
||||
MEM_STATIC unsigned MEM_isLittleEndian(void)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
return 1;
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
return 0;
|
||||
#elif defined(__clang__) && __LITTLE_ENDIAN__
|
||||
return 1;
|
||||
#elif defined(__clang__) && __BIG_ENDIAN__
|
||||
return 0;
|
||||
#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
|
||||
return 1;
|
||||
#elif defined(__DMC__) && defined(_M_IX86)
|
||||
return 1;
|
||||
#else
|
||||
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
|
||||
return one.c[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
|
||||
|
|
@ -204,30 +175,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
|
|||
|
||||
#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
|
||||
|
||||
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
|
||||
/* currently only defined for gcc and icc */
|
||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||
__pragma( pack(push, 1) )
|
||||
typedef struct { U16 v; } unalign16;
|
||||
typedef struct { U32 v; } unalign32;
|
||||
typedef struct { U64 v; } unalign64;
|
||||
typedef struct { size_t v; } unalignArch;
|
||||
__pragma( pack(pop) )
|
||||
#else
|
||||
typedef struct { U16 v; } __attribute__((packed)) unalign16;
|
||||
typedef struct { U32 v; } __attribute__((packed)) unalign32;
|
||||
typedef struct { U64 v; } __attribute__((packed)) unalign64;
|
||||
typedef struct { size_t v; } __attribute__((packed)) unalignArch;
|
||||
#endif
|
||||
typedef __attribute__((aligned(1))) U16 unalign16;
|
||||
typedef __attribute__((aligned(1))) U32 unalign32;
|
||||
typedef __attribute__((aligned(1))) U64 unalign64;
|
||||
typedef __attribute__((aligned(1))) size_t unalignArch;
|
||||
|
||||
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
|
||||
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
|
||||
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
|
||||
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
|
||||
MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; }
|
||||
MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; }
|
||||
MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; }
|
||||
MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; }
|
||||
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; }
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; }
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; }
|
||||
|
||||
#else
|
||||
|
||||
|
|
@ -236,41 +196,49 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
|
|||
|
||||
MEM_STATIC U16 MEM_read16(const void* memPtr)
|
||||
{
|
||||
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_read32(const void* memPtr)
|
||||
{
|
||||
U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC U64 MEM_read64(const void* memPtr)
|
||||
{
|
||||
U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t MEM_readST(const void* memPtr)
|
||||
{
|
||||
size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_write16(void* memPtr, U16 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZSTD_memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_write32(void* memPtr, U32 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZSTD_memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_write64(void* memPtr, U64 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZSTD_memcpy(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
#endif /* MEM_FORCE_MEMORY_ACCESS */
|
||||
|
||||
MEM_STATIC U32 MEM_swap32_fallback(U32 in)
|
||||
{
|
||||
return ((in << 24) & 0xff000000 ) |
|
||||
((in << 8) & 0x00ff0000 ) |
|
||||
((in >> 8) & 0x0000ff00 ) |
|
||||
((in >> 24) & 0x000000ff );
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_swap32(U32 in)
|
||||
{
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
|
|
@ -279,13 +247,22 @@ MEM_STATIC U32 MEM_swap32(U32 in)
|
|||
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
|
||||
return __builtin_bswap32(in);
|
||||
#else
|
||||
return ((in << 24) & 0xff000000 ) |
|
||||
((in << 8) & 0x00ff0000 ) |
|
||||
((in >> 8) & 0x0000ff00 ) |
|
||||
((in >> 24) & 0x000000ff );
|
||||
return MEM_swap32_fallback(in);
|
||||
#endif
|
||||
}
|
||||
|
||||
MEM_STATIC U64 MEM_swap64_fallback(U64 in)
|
||||
{
|
||||
return ((in << 56) & 0xff00000000000000ULL) |
|
||||
((in << 40) & 0x00ff000000000000ULL) |
|
||||
((in << 24) & 0x0000ff0000000000ULL) |
|
||||
((in << 8) & 0x000000ff00000000ULL) |
|
||||
((in >> 8) & 0x00000000ff000000ULL) |
|
||||
((in >> 24) & 0x0000000000ff0000ULL) |
|
||||
((in >> 40) & 0x000000000000ff00ULL) |
|
||||
((in >> 56) & 0x00000000000000ffULL);
|
||||
}
|
||||
|
||||
MEM_STATIC U64 MEM_swap64(U64 in)
|
||||
{
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
|
|
@ -294,14 +271,7 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|
|||
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
|
||||
return __builtin_bswap64(in);
|
||||
#else
|
||||
return ((in << 56) & 0xff00000000000000ULL) |
|
||||
((in << 40) & 0x00ff000000000000ULL) |
|
||||
((in << 24) & 0x0000ff0000000000ULL) |
|
||||
((in << 8) & 0x000000ff00000000ULL) |
|
||||
((in >> 8) & 0x00000000ff000000ULL) |
|
||||
((in >> 24) & 0x0000000000ff0000ULL) |
|
||||
((in >> 40) & 0x000000000000ff00ULL) |
|
||||
((in >> 56) & 0x00000000000000ffULL);
|
||||
return MEM_swap64_fallback(in);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -338,7 +308,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
|||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
|
||||
|
|
@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
|
|||
MEM_writeBE64(memPtr, (U64)val);
|
||||
}
|
||||
|
||||
/* code only tested on 32 and 64 bits systems */
|
||||
MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -10,9 +10,9 @@
|
|||
|
||||
|
||||
/* ====== Dependencies ======= */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "debug.h" /* assert */
|
||||
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
|
||||
#include "pool.h"
|
||||
|
||||
/* ====== Compiler specifics ====== */
|
||||
|
|
@ -86,7 +86,7 @@ static void* POOL_thread(void* opaque) {
|
|||
{ POOL_job const job = ctx->queue[ctx->queueHead];
|
||||
ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
|
||||
ctx->numThreadsBusy++;
|
||||
ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
|
||||
ctx->queueEmpty = (ctx->queueHead == ctx->queueTail);
|
||||
/* Unlock the mutex, signal a pusher, and run the job */
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
|
|
@ -96,33 +96,37 @@ static void* POOL_thread(void* opaque) {
|
|||
/* If the intended queue size was 0, signal after finishing job */
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
ctx->numThreadsBusy--;
|
||||
if (ctx->queueSize == 1) {
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
}
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
}
|
||||
} /* for (;;) */
|
||||
assert(0); /* Unreachable */
|
||||
}
|
||||
|
||||
/* ZSTD_createThreadPool() : public access point */
|
||||
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
|
||||
return POOL_create (numThreads, 0);
|
||||
}
|
||||
|
||||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
|
||||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
||||
ZSTD_customMem customMem) {
|
||||
ZSTD_customMem customMem)
|
||||
{
|
||||
POOL_ctx* ctx;
|
||||
/* Check parameters */
|
||||
if (!numThreads) { return NULL; }
|
||||
/* Allocate the context and zero initialize */
|
||||
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
|
||||
ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
|
||||
if (!ctx) { return NULL; }
|
||||
/* Initialize the job queue.
|
||||
* It needs one extra space since one space is wasted to differentiate
|
||||
* empty and full queues.
|
||||
*/
|
||||
ctx->queueSize = queueSize + 1;
|
||||
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
|
||||
ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem);
|
||||
ctx->queueHead = 0;
|
||||
ctx->queueTail = 0;
|
||||
ctx->numThreadsBusy = 0;
|
||||
|
|
@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
|||
}
|
||||
ctx->shutdown = 0;
|
||||
/* Allocate space for the thread handles */
|
||||
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
||||
ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
||||
ctx->threadCapacity = 0;
|
||||
ctx->customMem = customMem;
|
||||
/* Check for errors */
|
||||
|
|
@ -169,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) {
|
|||
/* Join all of the threads */
|
||||
{ size_t i;
|
||||
for (i = 0; i < ctx->threadCapacity; ++i) {
|
||||
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
|
||||
ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */
|
||||
} }
|
||||
}
|
||||
|
||||
|
|
@ -179,14 +183,27 @@ void POOL_free(POOL_ctx *ctx) {
|
|||
ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
|
||||
ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
|
||||
ZSTD_free(ctx->queue, ctx->customMem);
|
||||
ZSTD_free(ctx->threads, ctx->customMem);
|
||||
ZSTD_free(ctx, ctx->customMem);
|
||||
ZSTD_customFree(ctx->queue, ctx->customMem);
|
||||
ZSTD_customFree(ctx->threads, ctx->customMem);
|
||||
ZSTD_customFree(ctx, ctx->customMem);
|
||||
}
|
||||
|
||||
/*! POOL_joinJobs() :
|
||||
* Waits for all queued jobs to finish executing.
|
||||
*/
|
||||
void POOL_joinJobs(POOL_ctx* ctx) {
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) {
|
||||
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
}
|
||||
|
||||
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
|
||||
POOL_free (pool);
|
||||
}
|
||||
|
||||
size_t POOL_sizeof(POOL_ctx *ctx) {
|
||||
size_t POOL_sizeof(const POOL_ctx* ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
return sizeof(*ctx)
|
||||
+ ctx->queueSize * sizeof(POOL_job)
|
||||
|
|
@ -203,11 +220,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
|
|||
return 0;
|
||||
}
|
||||
/* numThreads > threadCapacity */
|
||||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
|
||||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
|
||||
if (!threadPool) return 1;
|
||||
/* replace existing thread pool */
|
||||
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
|
||||
ZSTD_free(ctx->threads, ctx->customMem);
|
||||
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
|
||||
ZSTD_customFree(ctx->threads, ctx->customMem);
|
||||
ctx->threads = threadPool;
|
||||
/* Initialize additional threads */
|
||||
{ size_t threadId;
|
||||
|
|
@ -251,9 +268,12 @@ static int isQueueFull(POOL_ctx const* ctx) {
|
|||
}
|
||||
|
||||
|
||||
static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
|
||||
static void
|
||||
POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
|
||||
{
|
||||
POOL_job const job = {function, opaque};
|
||||
POOL_job job;
|
||||
job.function = function;
|
||||
job.opaque = opaque;
|
||||
assert(ctx != NULL);
|
||||
if (ctx->shutdown) return;
|
||||
|
||||
|
|
@ -301,21 +321,28 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
|
|||
struct POOL_ctx_s {
|
||||
int dummy;
|
||||
};
|
||||
static POOL_ctx g_ctx;
|
||||
static POOL_ctx g_poolCtx;
|
||||
|
||||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
|
||||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
|
||||
POOL_ctx*
|
||||
POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem)
|
||||
{
|
||||
(void)numThreads;
|
||||
(void)queueSize;
|
||||
(void)customMem;
|
||||
return &g_ctx;
|
||||
return &g_poolCtx;
|
||||
}
|
||||
|
||||
void POOL_free(POOL_ctx* ctx) {
|
||||
assert(!ctx || ctx == &g_ctx);
|
||||
assert(!ctx || ctx == &g_poolCtx);
|
||||
(void)ctx;
|
||||
}
|
||||
|
||||
void POOL_joinJobs(POOL_ctx* ctx){
|
||||
assert(!ctx || ctx == &g_poolCtx);
|
||||
(void)ctx;
|
||||
}
|
||||
|
||||
|
|
@ -335,9 +362,9 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
size_t POOL_sizeof(POOL_ctx* ctx) {
|
||||
size_t POOL_sizeof(const POOL_ctx* ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
assert(ctx == &g_ctx);
|
||||
assert(ctx == &g_poolCtx);
|
||||
return sizeof(*ctx);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -16,7 +16,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
|
||||
#include "zstd.h"
|
||||
|
||||
|
|
@ -38,10 +38,16 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
|||
*/
|
||||
void POOL_free(POOL_ctx* ctx);
|
||||
|
||||
|
||||
/*! POOL_joinJobs() :
|
||||
* Waits for all queued jobs to finish executing.
|
||||
*/
|
||||
void POOL_joinJobs(POOL_ctx* ctx);
|
||||
|
||||
/*! POOL_resize() :
|
||||
* Expands or shrinks pool's number of threads.
|
||||
* This is more efficient than releasing + creating a new context,
|
||||
* since it tries to preserve and re-use existing threads.
|
||||
* since it tries to preserve and reuse existing threads.
|
||||
* `numThreads` must be at least 1.
|
||||
* @return : 0 when resize was successful,
|
||||
* !0 (typically 1) if there is an error.
|
||||
|
|
@ -53,7 +59,7 @@ int POOL_resize(POOL_ctx* ctx, size_t numThreads);
|
|||
* @return threadpool memory usage
|
||||
* note : compatible with NULL (returns 0 in this case)
|
||||
*/
|
||||
size_t POOL_sizeof(POOL_ctx* ctx);
|
||||
size_t POOL_sizeof(const POOL_ctx* ctx);
|
||||
|
||||
/*! POOL_function :
|
||||
* The function type that can be added to a thread pool.
|
||||
|
|
@ -70,7 +76,7 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
|
|||
|
||||
|
||||
/*! POOL_tryAdd() :
|
||||
* Add the job `function(opaque)` to thread pool _if_ a worker is available.
|
||||
* Add the job `function(opaque)` to thread pool _if_ a queue slot is available.
|
||||
* Returns immediately even if not (does not block).
|
||||
* @return : 1 if successful, 0 if not.
|
||||
*/
|
||||
|
|
|
|||
158
uppsrc/plugin/zstd/lib/portability_macros.h
Normal file
158
uppsrc/plugin/zstd/lib/portability_macros.h
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_PORTABILITY_MACROS_H
|
||||
#define ZSTD_PORTABILITY_MACROS_H
|
||||
|
||||
/**
|
||||
* This header file contains macro definitions to support portability.
|
||||
* This header is shared between C and ASM code, so it MUST only
|
||||
* contain macro definitions. It MUST not contain any C code.
|
||||
*
|
||||
* This header ONLY defines macros to detect platforms/feature support.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
#ifndef ZSTD_MEMORY_SANITIZER
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# define ZSTD_MEMORY_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_MEMORY_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
#ifndef ZSTD_ADDRESS_SANITIZER
|
||||
# if __has_feature(address_sanitizer)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# elif defined(__SANITIZE_ADDRESS__)
|
||||
# define ZSTD_ADDRESS_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_ADDRESS_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under dfsan */
|
||||
#ifndef ZSTD_DATAFLOW_SANITIZER
|
||||
# if __has_feature(dataflow_sanitizer)
|
||||
# define ZSTD_DATAFLOW_SANITIZER 1
|
||||
# else
|
||||
# define ZSTD_DATAFLOW_SANITIZER 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Mark the internal assembly functions as hidden */
|
||||
#ifdef __ELF__
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
|
||||
#elif defined(__APPLE__)
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
|
||||
#else
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func)
|
||||
#endif
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X64)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Only enable assembly for GNUC compatible compilers,
|
||||
* because other platforms may not support GAS assembly syntax.
|
||||
*
|
||||
* Only enable assembly for Linux / MacOS, other platforms may
|
||||
* work, but they haven't been tested. This could likely be
|
||||
* extended to BSD systems.
|
||||
*
|
||||
* Disable assembly when MSAN is enabled, because MSAN requires
|
||||
* 100% of code to be instrumented to work.
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
# if defined(__linux__) || defined(__linux) || defined(__APPLE__)
|
||||
# if ZSTD_MEMORY_SANITIZER
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
# elif ZSTD_DATAFLOW_SANITIZER
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
# else
|
||||
# define ZSTD_ASM_SUPPORTED 1
|
||||
# endif
|
||||
# else
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
# endif
|
||||
#else
|
||||
# define ZSTD_ASM_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines whether we should enable assembly for x86-64
|
||||
* with BMI2.
|
||||
*
|
||||
* Enable if all of the following conditions hold:
|
||||
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
|
||||
* - Assembly is supported
|
||||
* - We are compiling for x86-64 and either:
|
||||
* - DYNAMIC_BMI2 is enabled
|
||||
* - BMI2 is supported at compile time
|
||||
*/
|
||||
#if !defined(ZSTD_DISABLE_ASM) && \
|
||||
ZSTD_ASM_SUPPORTED && \
|
||||
defined(__x86_64__) && \
|
||||
(DYNAMIC_BMI2 || defined(__BMI2__))
|
||||
# define ZSTD_ENABLE_ASM_X86_64_BMI2 1
|
||||
#else
|
||||
# define ZSTD_ENABLE_ASM_X86_64_BMI2 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
|
||||
* assembly sources when CET is enabled.
|
||||
*
|
||||
* Additionally, any function that may be called indirectly must begin
|
||||
* with ZSTD_CET_ENDBRANCH.
|
||||
*/
|
||||
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
|
||||
&& defined(__has_include)
|
||||
# if __has_include(<cet.h>)
|
||||
# include <cet.h>
|
||||
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_CET_ENDBRANCH
|
||||
# define ZSTD_CET_ENDBRANCH
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_PORTABILITY_MACROS_H */
|
||||
|
|
@ -23,8 +23,7 @@ int g_ZSTD_threading_useless_symbol;
|
|||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
|
||||
|
||||
/**
|
||||
* Windows minimalist Pthread Wrapper, based on :
|
||||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
|
||||
* Windows minimalist Pthread Wrapper
|
||||
*/
|
||||
|
||||
|
||||
|
|
@ -35,37 +34,94 @@ int g_ZSTD_threading_useless_symbol;
|
|||
|
||||
/* === Implementation === */
|
||||
|
||||
typedef struct {
|
||||
void* (*start_routine)(void*);
|
||||
void* arg;
|
||||
int initialized;
|
||||
ZSTD_pthread_cond_t initialized_cond;
|
||||
ZSTD_pthread_mutex_t initialized_mutex;
|
||||
} ZSTD_thread_params_t;
|
||||
|
||||
static unsigned __stdcall worker(void *arg)
|
||||
{
|
||||
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
|
||||
thread->arg = thread->start_routine(thread->arg);
|
||||
void* (*start_routine)(void*);
|
||||
void* thread_arg;
|
||||
|
||||
/* Initialized thread_arg and start_routine and signal main thread that we don't need it
|
||||
* to wait any longer.
|
||||
*/
|
||||
{
|
||||
ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg;
|
||||
thread_arg = thread_param->arg;
|
||||
start_routine = thread_param->start_routine;
|
||||
|
||||
/* Signal main thread that we are running and do not depend on its memory anymore */
|
||||
ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex);
|
||||
thread_param->initialized = 1;
|
||||
ZSTD_pthread_cond_signal(&thread_param->initialized_cond);
|
||||
ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex);
|
||||
}
|
||||
|
||||
start_routine(thread_arg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg)
|
||||
{
|
||||
ZSTD_thread_params_t thread_param;
|
||||
(void)unused;
|
||||
thread->arg = arg;
|
||||
thread->start_routine = start_routine;
|
||||
thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
|
||||
|
||||
if (!thread->handle)
|
||||
if (thread==NULL) return -1;
|
||||
*thread = NULL;
|
||||
|
||||
thread_param.start_routine = start_routine;
|
||||
thread_param.arg = arg;
|
||||
thread_param.initialized = 0;
|
||||
|
||||
/* Setup thread initialization synchronization */
|
||||
if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
|
||||
/* Should never happen on Windows */
|
||||
return -1;
|
||||
}
|
||||
if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) {
|
||||
/* Should never happen on Windows */
|
||||
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Spawn thread */
|
||||
*thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
|
||||
if (*thread==NULL) {
|
||||
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
|
||||
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
|
||||
return errno;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Wait for thread to be initialized */
|
||||
ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex);
|
||||
while(!thread_param.initialized) {
|
||||
ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex);
|
||||
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
|
||||
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread)
|
||||
{
|
||||
DWORD result;
|
||||
|
||||
if (!thread.handle) return 0;
|
||||
if (!thread) return 0;
|
||||
|
||||
result = WaitForSingleObject(thread, INFINITE);
|
||||
CloseHandle(thread);
|
||||
|
||||
result = WaitForSingleObject(thread.handle, INFINITE);
|
||||
switch (result) {
|
||||
case WAIT_OBJECT_0:
|
||||
if (value_ptr) *value_ptr = thread.arg;
|
||||
return 0;
|
||||
case WAIT_ABANDONED:
|
||||
return EINVAL;
|
||||
|
|
@ -78,11 +134,13 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
|||
|
||||
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
|
||||
|
||||
#include <stdlib.h>
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "zstd_deps.h"
|
||||
|
||||
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
|
||||
{
|
||||
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
|
||||
assert(mutex != NULL);
|
||||
*mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
|
||||
if (!*mutex)
|
||||
return 1;
|
||||
return pthread_mutex_init(*mutex, attr);
|
||||
|
|
@ -90,18 +148,20 @@ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t con
|
|||
|
||||
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
|
||||
{
|
||||
assert(mutex != NULL);
|
||||
if (!*mutex)
|
||||
return 0;
|
||||
{
|
||||
int const ret = pthread_mutex_destroy(*mutex);
|
||||
free(*mutex);
|
||||
ZSTD_free(*mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
|
||||
{
|
||||
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
|
||||
assert(cond != NULL);
|
||||
*cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
|
||||
if (!*cond)
|
||||
return 1;
|
||||
return pthread_cond_init(*cond, attr);
|
||||
|
|
@ -109,11 +169,12 @@ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const*
|
|||
|
||||
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
|
||||
{
|
||||
assert(cond != NULL);
|
||||
if (!*cond)
|
||||
return 0;
|
||||
{
|
||||
int const ret = pthread_cond_destroy(*cond);
|
||||
free(*cond);
|
||||
ZSTD_free(*cond);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,8 +23,7 @@ extern "C" {
|
|||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
|
||||
|
||||
/**
|
||||
* Windows minimalist Pthread Wrapper, based on :
|
||||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
|
||||
* Windows minimalist Pthread Wrapper
|
||||
*/
|
||||
#ifdef WINVER
|
||||
# undef WINVER
|
||||
|
|
@ -62,16 +61,12 @@ extern "C" {
|
|||
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
|
||||
|
||||
/* ZSTD_pthread_create() and ZSTD_pthread_join() */
|
||||
typedef struct {
|
||||
HANDLE handle;
|
||||
void* (*start_routine)(void*);
|
||||
void* arg;
|
||||
} ZSTD_pthread_t;
|
||||
typedef HANDLE ZSTD_pthread_t;
|
||||
|
||||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg);
|
||||
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
||||
int ZSTD_pthread_join(ZSTD_pthread_t thread);
|
||||
|
||||
/**
|
||||
* add here more wrappers as required
|
||||
|
|
@ -99,7 +94,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
|||
|
||||
#define ZSTD_pthread_t pthread_t
|
||||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
||||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
||||
#define ZSTD_pthread_join(a) pthread_join((a),NULL)
|
||||
|
||||
#else /* DEBUGLEVEL >= 1 */
|
||||
|
||||
|
|
@ -124,7 +119,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
|
|||
|
||||
#define ZSTD_pthread_t pthread_t
|
||||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
||||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
||||
#define ZSTD_pthread_join(a) pthread_join((a),NULL)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -1,864 +1,18 @@
|
|||
/*
|
||||
* xxHash - Fast Hash algorithm
|
||||
* Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
|
||||
* xxHash - Extremely Fast Hash algorithm
|
||||
* Copyright (c) Yann Collet - Meta Platforms, Inc
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - xxHash homepage: http://www.xxhash.com
|
||||
* - xxHash source repository : https://github.com/Cyan4973/xxHash
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Tuning parameters
|
||||
***************************************/
|
||||
/*!XXH_FORCE_MEMORY_ACCESS :
|
||||
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
||||
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
|
||||
* The below switch allow to select different access method for improved performance.
|
||||
* Method 0 (default) : use `memcpy()`. Safe and portable.
|
||||
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
|
||||
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
|
||||
* Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
|
||||
* It can generate buggy code on targets which do not support unaligned memory accesses.
|
||||
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
|
||||
* See http://stackoverflow.com/a/32095106/646947 for details.
|
||||
* Prefer these methods in priority order (0 > 1 > 2)
|
||||
*/
|
||||
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
||||
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
|
||||
# define XXH_FORCE_MEMORY_ACCESS 2
|
||||
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
|
||||
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
|
||||
defined(__ICCARM__)
|
||||
# define XXH_FORCE_MEMORY_ACCESS 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*!XXH_ACCEPT_NULL_INPUT_POINTER :
|
||||
* If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
|
||||
* When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
|
||||
* By default, this option is disabled. To enable it, uncomment below define :
|
||||
/*
|
||||
* xxhash.c instantiates functions defined in xxhash.h
|
||||
*/
|
||||
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
|
||||
|
||||
/*!XXH_FORCE_NATIVE_FORMAT :
|
||||
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
|
||||
* Results are therefore identical for little-endian and big-endian CPU.
|
||||
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
||||
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
|
||||
* to improve speed for Big-endian CPU.
|
||||
* This option has no impact on Little_Endian CPU.
|
||||
*/
|
||||
#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
|
||||
# define XXH_FORCE_NATIVE_FORMAT 0
|
||||
#endif
|
||||
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||
#define XXH_IMPLEMENTATION /* access definitions */
|
||||
|
||||
/*!XXH_FORCE_ALIGN_CHECK :
|
||||
* This is a minor performance trick, only useful with lots of very small keys.
|
||||
* It means : check for aligned/unaligned input.
|
||||
* The check costs one initial branch per hash; set to 0 when the input data
|
||||
* is guaranteed to be aligned.
|
||||
*/
|
||||
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
|
||||
# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
|
||||
# define XXH_FORCE_ALIGN_CHECK 0
|
||||
# else
|
||||
# define XXH_FORCE_ALIGN_CHECK 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Includes & Memory related functions
|
||||
***************************************/
|
||||
/* Modify the local functions below should you wish to use some other memory routines */
|
||||
/* for malloc(), free() */
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h> /* size_t */
|
||||
static void* XXH_malloc(size_t s) { return malloc(s); }
|
||||
static void XXH_free (void* p) { free(p); }
|
||||
/* for memcpy() */
|
||||
#include <string.h>
|
||||
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
|
||||
|
||||
#ifndef XXH_STATIC_LINKING_ONLY
|
||||
# define XXH_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "xxhash.h"
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Compiler Specific Options
|
||||
***************************************/
|
||||
#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
||||
# define INLINE_KEYWORD inline
|
||||
#else
|
||||
# define INLINE_KEYWORD
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__ICCARM__)
|
||||
# define FORCE_INLINE_ATTR __attribute__((always_inline))
|
||||
#elif defined(_MSC_VER)
|
||||
# define FORCE_INLINE_ATTR __forceinline
|
||||
#else
|
||||
# define FORCE_INLINE_ATTR
|
||||
#endif
|
||||
|
||||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Basic Types
|
||||
***************************************/
|
||||
#ifndef MEM_MODULE
|
||||
# define MEM_MODULE
|
||||
# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
||||
# include <stdint.h>
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint16_t U16;
|
||||
typedef uint32_t U32;
|
||||
typedef int32_t S32;
|
||||
typedef uint64_t U64;
|
||||
# else
|
||||
typedef unsigned char BYTE;
|
||||
typedef unsigned short U16;
|
||||
typedef unsigned int U32;
|
||||
typedef signed int S32;
|
||||
typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
|
||||
|
||||
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
|
||||
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
|
||||
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
|
||||
|
||||
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
||||
|
||||
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
|
||||
/* currently only defined for gcc and icc */
|
||||
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
|
||||
|
||||
static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
|
||||
static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
|
||||
|
||||
#else
|
||||
|
||||
/* portable and safe solution. Generally efficient.
|
||||
* see : http://stackoverflow.com/a/32095106/646947
|
||||
*/
|
||||
|
||||
static U32 XXH_read32(const void* memPtr)
|
||||
{
|
||||
U32 val;
|
||||
memcpy(&val, memPtr, sizeof(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static U64 XXH_read64(const void* memPtr)
|
||||
{
|
||||
U64 val;
|
||||
memcpy(&val, memPtr, sizeof(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
|
||||
|
||||
|
||||
/* ****************************************
|
||||
* Compiler-specific Functions and Macros
|
||||
******************************************/
|
||||
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
|
||||
/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
|
||||
#if defined(_MSC_VER)
|
||||
# define XXH_rotl32(x,r) _rotl(x,r)
|
||||
# define XXH_rotl64(x,r) _rotl64(x,r)
|
||||
#else
|
||||
#if defined(__ICCARM__)
|
||||
# include <intrinsics.h>
|
||||
# define XXH_rotl32(x,r) __ROR(x,(32 - r))
|
||||
#else
|
||||
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
#endif
|
||||
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
# define XXH_swap32 _byteswap_ulong
|
||||
# define XXH_swap64 _byteswap_uint64
|
||||
#elif GCC_VERSION >= 403
|
||||
# define XXH_swap32 __builtin_bswap32
|
||||
# define XXH_swap64 __builtin_bswap64
|
||||
#else
|
||||
static U32 XXH_swap32 (U32 x)
|
||||
{
|
||||
return ((x << 24) & 0xff000000 ) |
|
||||
((x << 8) & 0x00ff0000 ) |
|
||||
((x >> 8) & 0x0000ff00 ) |
|
||||
((x >> 24) & 0x000000ff );
|
||||
}
|
||||
static U64 XXH_swap64 (U64 x)
|
||||
{
|
||||
return ((x << 56) & 0xff00000000000000ULL) |
|
||||
((x << 40) & 0x00ff000000000000ULL) |
|
||||
((x << 24) & 0x0000ff0000000000ULL) |
|
||||
((x << 8) & 0x000000ff00000000ULL) |
|
||||
((x >> 8) & 0x00000000ff000000ULL) |
|
||||
((x >> 24) & 0x0000000000ff0000ULL) |
|
||||
((x >> 40) & 0x000000000000ff00ULL) |
|
||||
((x >> 56) & 0x00000000000000ffULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Architecture Macros
|
||||
***************************************/
|
||||
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
|
||||
|
||||
/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
|
||||
#ifndef XXH_CPU_LITTLE_ENDIAN
|
||||
static const int g_one = 1;
|
||||
# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
|
||||
#endif
|
||||
|
||||
|
||||
/* ***************************
|
||||
* Memory reads
|
||||
*****************************/
|
||||
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
|
||||
{
|
||||
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
|
||||
}
|
||||
|
||||
static U32 XXH_readBE32(const void* ptr)
|
||||
{
|
||||
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
|
||||
{
|
||||
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
|
||||
}
|
||||
|
||||
static U64 XXH_readBE64(const void* ptr)
|
||||
{
|
||||
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
|
||||
}
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Macros
|
||||
***************************************/
|
||||
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const U32 PRIME32_1 = 2654435761U;
|
||||
static const U32 PRIME32_2 = 2246822519U;
|
||||
static const U32 PRIME32_3 = 3266489917U;
|
||||
static const U32 PRIME32_4 = 668265263U;
|
||||
static const U32 PRIME32_5 = 374761393U;
|
||||
|
||||
static const U64 PRIME64_1 = 11400714785074694791ULL;
|
||||
static const U64 PRIME64_2 = 14029467366897019727ULL;
|
||||
static const U64 PRIME64_3 = 1609587929392839161ULL;
|
||||
static const U64 PRIME64_4 = 9650029242287828579ULL;
|
||||
static const U64 PRIME64_5 = 2870177450012600261ULL;
|
||||
|
||||
XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
|
||||
|
||||
|
||||
/* **************************
|
||||
* Utils
|
||||
****************************/
|
||||
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
|
||||
{
|
||||
memcpy(dstState, srcState, sizeof(*dstState));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
|
||||
{
|
||||
memcpy(dstState, srcState, sizeof(*dstState));
|
||||
}
|
||||
|
||||
|
||||
/* ***************************
|
||||
* Simple Hash Functions
|
||||
*****************************/
|
||||
|
||||
static U32 XXH32_round(U32 seed, U32 input)
|
||||
{
|
||||
seed += input * PRIME32_2;
|
||||
seed = XXH_rotl32(seed, 13);
|
||||
seed *= PRIME32_1;
|
||||
return seed;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* bEnd = p + len;
|
||||
U32 h32;
|
||||
#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (p==NULL) {
|
||||
len=0;
|
||||
bEnd=p=(const BYTE*)(size_t)16;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (len>=16) {
|
||||
const BYTE* const limit = bEnd - 16;
|
||||
U32 v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
U32 v2 = seed + PRIME32_2;
|
||||
U32 v3 = seed + 0;
|
||||
U32 v4 = seed - PRIME32_1;
|
||||
|
||||
do {
|
||||
v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
|
||||
v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
|
||||
v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
|
||||
v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
|
||||
} while (p<=limit);
|
||||
|
||||
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
|
||||
} else {
|
||||
h32 = seed + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += (U32) len;
|
||||
|
||||
while (p+4<=bEnd) {
|
||||
h32 += XXH_get32bits(p) * PRIME32_3;
|
||||
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
|
||||
{
|
||||
#if 0
|
||||
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
||||
XXH32_CREATESTATE_STATIC(state);
|
||||
XXH32_reset(state, seed);
|
||||
XXH32_update(state, input, len);
|
||||
return XXH32_digest(state);
|
||||
#else
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if (XXH_FORCE_ALIGN_CHECK) {
|
||||
if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
||||
else
|
||||
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
||||
} }
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
||||
else
|
||||
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static U64 XXH64_round(U64 acc, U64 input)
|
||||
{
|
||||
acc += input * PRIME64_2;
|
||||
acc = XXH_rotl64(acc, 31);
|
||||
acc *= PRIME64_1;
|
||||
return acc;
|
||||
}
|
||||
|
||||
static U64 XXH64_mergeRound(U64 acc, U64 val)
|
||||
{
|
||||
val = XXH64_round(0, val);
|
||||
acc ^= val;
|
||||
acc = acc * PRIME64_1 + PRIME64_4;
|
||||
return acc;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
U64 h64;
|
||||
#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (p==NULL) {
|
||||
len=0;
|
||||
bEnd=p=(const BYTE*)(size_t)32;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (len>=32) {
|
||||
const BYTE* const limit = bEnd - 32;
|
||||
U64 v1 = seed + PRIME64_1 + PRIME64_2;
|
||||
U64 v2 = seed + PRIME64_2;
|
||||
U64 v3 = seed + 0;
|
||||
U64 v4 = seed - PRIME64_1;
|
||||
|
||||
do {
|
||||
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
|
||||
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
|
||||
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
|
||||
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
|
||||
} while (p<=limit);
|
||||
|
||||
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
||||
h64 = XXH64_mergeRound(h64, v1);
|
||||
h64 = XXH64_mergeRound(h64, v2);
|
||||
h64 = XXH64_mergeRound(h64, v3);
|
||||
h64 = XXH64_mergeRound(h64, v4);
|
||||
|
||||
} else {
|
||||
h64 = seed + PRIME64_5;
|
||||
}
|
||||
|
||||
h64 += (U64) len;
|
||||
|
||||
while (p+8<=bEnd) {
|
||||
U64 const k1 = XXH64_round(0, XXH_get64bits(p));
|
||||
h64 ^= k1;
|
||||
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
|
||||
p+=8;
|
||||
}
|
||||
|
||||
if (p+4<=bEnd) {
|
||||
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
|
||||
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h64 ^= (*p) * PRIME64_5;
|
||||
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h64 ^= h64 >> 33;
|
||||
h64 *= PRIME64_2;
|
||||
h64 ^= h64 >> 29;
|
||||
h64 *= PRIME64_3;
|
||||
h64 ^= h64 >> 32;
|
||||
|
||||
return h64;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
|
||||
{
|
||||
#if 0
|
||||
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
||||
XXH64_CREATESTATE_STATIC(state);
|
||||
XXH64_reset(state, seed);
|
||||
XXH64_update(state, input, len);
|
||||
return XXH64_digest(state);
|
||||
#else
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if (XXH_FORCE_ALIGN_CHECK) {
|
||||
if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
|
||||
else
|
||||
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
|
||||
} }
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
|
||||
else
|
||||
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* **************************************************
|
||||
* Advanced Hash Functions
|
||||
****************************************************/
|
||||
|
||||
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
|
||||
{
|
||||
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
|
||||
}
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
|
||||
{
|
||||
XXH_free(statePtr);
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
|
||||
{
|
||||
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
|
||||
}
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
||||
{
|
||||
XXH_free(statePtr);
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
/*** Hash feed ***/
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
|
||||
{
|
||||
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
|
||||
memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
|
||||
state.v1 = seed + PRIME32_1 + PRIME32_2;
|
||||
state.v2 = seed + PRIME32_2;
|
||||
state.v3 = seed + 0;
|
||||
state.v4 = seed - PRIME32_1;
|
||||
memcpy(statePtr, &state, sizeof(state));
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
|
||||
{
|
||||
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
|
||||
memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
|
||||
state.v1 = seed + PRIME64_1 + PRIME64_2;
|
||||
state.v2 = seed + PRIME64_2;
|
||||
state.v3 = seed + 0;
|
||||
state.v4 = seed - PRIME64_1;
|
||||
memcpy(statePtr, &state, sizeof(state));
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (input==NULL) return XXH_ERROR;
|
||||
#endif
|
||||
|
||||
state->total_len_32 += (unsigned)len;
|
||||
state->large_len |= (len>=16) | (state->total_len_32>=16);
|
||||
|
||||
if (state->memsize + len < 16) { /* fill in tmp buffer */
|
||||
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
|
||||
state->memsize += (unsigned)len;
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
if (state->memsize) { /* some data left from previous update */
|
||||
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
|
||||
{ const U32* p32 = state->mem32;
|
||||
state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
|
||||
state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
|
||||
state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
|
||||
state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
|
||||
}
|
||||
p += 16-state->memsize;
|
||||
state->memsize = 0;
|
||||
}
|
||||
|
||||
if (p <= bEnd-16) {
|
||||
const BYTE* const limit = bEnd - 16;
|
||||
U32 v1 = state->v1;
|
||||
U32 v2 = state->v2;
|
||||
U32 v3 = state->v3;
|
||||
U32 v4 = state->v4;
|
||||
|
||||
do {
|
||||
v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
|
||||
v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
|
||||
v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
|
||||
v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
|
||||
} while (p<=limit);
|
||||
|
||||
state->v1 = v1;
|
||||
state->v2 = v2;
|
||||
state->v3 = v3;
|
||||
state->v4 = v4;
|
||||
}
|
||||
|
||||
if (p < bEnd) {
|
||||
XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
|
||||
state->memsize = (unsigned)(bEnd-p);
|
||||
}
|
||||
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
|
||||
else
|
||||
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
|
||||
{
|
||||
const BYTE * p = (const BYTE*)state->mem32;
|
||||
const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
|
||||
U32 h32;
|
||||
|
||||
if (state->large_len) {
|
||||
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
|
||||
} else {
|
||||
h32 = state->v3 /* == seed */ + PRIME32_5;
|
||||
}
|
||||
|
||||
h32 += state->total_len_32;
|
||||
|
||||
while (p+4<=bEnd) {
|
||||
h32 += XXH_readLE32(p, endian) * PRIME32_3;
|
||||
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h32 += (*p) * PRIME32_5;
|
||||
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15;
|
||||
h32 *= PRIME32_2;
|
||||
h32 ^= h32 >> 13;
|
||||
h32 *= PRIME32_3;
|
||||
h32 ^= h32 >> 16;
|
||||
|
||||
return h32;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH32_digest_endian(state_in, XXH_littleEndian);
|
||||
else
|
||||
return XXH32_digest_endian(state_in, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* **** XXH64 **** */
|
||||
|
||||
FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
|
||||
{
|
||||
const BYTE* p = (const BYTE*)input;
|
||||
const BYTE* const bEnd = p + len;
|
||||
|
||||
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
|
||||
if (input==NULL) return XXH_ERROR;
|
||||
#endif
|
||||
|
||||
state->total_len += len;
|
||||
|
||||
if (state->memsize + len < 32) { /* fill in tmp buffer */
|
||||
if (input != NULL) {
|
||||
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
|
||||
}
|
||||
state->memsize += (U32)len;
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
if (state->memsize) { /* tmp buffer is full */
|
||||
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
|
||||
state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
|
||||
state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
|
||||
state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
|
||||
state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
|
||||
p += 32-state->memsize;
|
||||
state->memsize = 0;
|
||||
}
|
||||
|
||||
if (p+32 <= bEnd) {
|
||||
const BYTE* const limit = bEnd - 32;
|
||||
U64 v1 = state->v1;
|
||||
U64 v2 = state->v2;
|
||||
U64 v3 = state->v3;
|
||||
U64 v4 = state->v4;
|
||||
|
||||
do {
|
||||
v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
|
||||
v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
|
||||
v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
|
||||
v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
|
||||
} while (p<=limit);
|
||||
|
||||
state->v1 = v1;
|
||||
state->v2 = v2;
|
||||
state->v3 = v3;
|
||||
state->v4 = v4;
|
||||
}
|
||||
|
||||
if (p < bEnd) {
|
||||
XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
|
||||
state->memsize = (unsigned)(bEnd-p);
|
||||
}
|
||||
|
||||
return XXH_OK;
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
|
||||
else
|
||||
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
|
||||
{
|
||||
const BYTE * p = (const BYTE*)state->mem64;
|
||||
const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
|
||||
U64 h64;
|
||||
|
||||
if (state->total_len >= 32) {
|
||||
U64 const v1 = state->v1;
|
||||
U64 const v2 = state->v2;
|
||||
U64 const v3 = state->v3;
|
||||
U64 const v4 = state->v4;
|
||||
|
||||
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
||||
h64 = XXH64_mergeRound(h64, v1);
|
||||
h64 = XXH64_mergeRound(h64, v2);
|
||||
h64 = XXH64_mergeRound(h64, v3);
|
||||
h64 = XXH64_mergeRound(h64, v4);
|
||||
} else {
|
||||
h64 = state->v3 + PRIME64_5;
|
||||
}
|
||||
|
||||
h64 += (U64) state->total_len;
|
||||
|
||||
while (p+8<=bEnd) {
|
||||
U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
|
||||
h64 ^= k1;
|
||||
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
|
||||
p+=8;
|
||||
}
|
||||
|
||||
if (p+4<=bEnd) {
|
||||
h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
|
||||
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
|
||||
p+=4;
|
||||
}
|
||||
|
||||
while (p<bEnd) {
|
||||
h64 ^= (*p) * PRIME64_5;
|
||||
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
|
||||
p++;
|
||||
}
|
||||
|
||||
h64 ^= h64 >> 33;
|
||||
h64 *= PRIME64_2;
|
||||
h64 ^= h64 >> 29;
|
||||
h64 *= PRIME64_3;
|
||||
h64 ^= h64 >> 32;
|
||||
|
||||
return h64;
|
||||
}
|
||||
|
||||
|
||||
XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
|
||||
{
|
||||
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
|
||||
|
||||
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
|
||||
return XXH64_digest_endian(state_in, XXH_littleEndian);
|
||||
else
|
||||
return XXH64_digest_endian(state_in, XXH_bigEndian);
|
||||
}
|
||||
|
||||
|
||||
/* **************************
|
||||
* Canonical representation
|
||||
****************************/
|
||||
|
||||
/*! Default XXH result types are basic unsigned 32 and 64 bits.
|
||||
* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
|
||||
* These functions allow transformation of hash result into and from its canonical format.
|
||||
* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
|
||||
*/
|
||||
|
||||
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
|
||||
{
|
||||
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
||||
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
||||
memcpy(dst, &hash, sizeof(*dst));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
|
||||
{
|
||||
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
||||
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
||||
memcpy(dst, &hash, sizeof(*dst));
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
||||
{
|
||||
return XXH_readBE32(src);
|
||||
}
|
||||
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
|
||||
{
|
||||
return XXH_readBE64(src);
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -13,8 +13,7 @@
|
|||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdlib.h> /* malloc, calloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#define ZSTD_DEPS_NEED_MALLOC
|
||||
#include "error_private.h"
|
||||
#include "zstd_internal.h"
|
||||
|
||||
|
|
@ -47,37 +46,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
|
|||
/*! ZSTD_getErrorString() :
|
||||
* provides error code string from enum */
|
||||
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
|
||||
|
||||
|
||||
|
||||
/*=**************************************************************
|
||||
* Custom allocator
|
||||
****************************************************************/
|
||||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc)
|
||||
return customMem.customAlloc(customMem.opaque, size);
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
|
||||
{
|
||||
if (customMem.customAlloc) {
|
||||
/* calloc implemented as malloc+memset;
|
||||
* not as efficient as calloc, but next best guess for custom malloc */
|
||||
void* const ptr = customMem.customAlloc(customMem.opaque, size);
|
||||
memset(ptr, 0, size);
|
||||
return ptr;
|
||||
}
|
||||
return calloc(1, size);
|
||||
}
|
||||
|
||||
void ZSTD_free(void* ptr, ZSTD_customMem customMem)
|
||||
{
|
||||
if (ptr!=NULL) {
|
||||
if (customMem.customFree)
|
||||
customMem.customFree(customMem.opaque, ptr);
|
||||
else
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -13,11 +13,36 @@
|
|||
***************************************/
|
||||
#include "zstd_compress_literals.h"
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
* Debug Traces
|
||||
****************************************************************/
|
||||
#if DEBUGLEVEL >= 2
|
||||
|
||||
static size_t showHexa(const void* src, size_t srcSize)
|
||||
{
|
||||
const BYTE* const ip = (const BYTE*)src;
|
||||
size_t u;
|
||||
for (u=0; u<srcSize; u++) {
|
||||
RAWLOG(5, " %02X", ip[u]); (void)ip;
|
||||
}
|
||||
RAWLOG(5, " \n");
|
||||
return srcSize;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
* Literals compression - special cases
|
||||
****************************************************************/
|
||||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE* const)dst;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
|
||||
|
||||
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
|
||||
|
||||
switch(flSize)
|
||||
|
|
@ -35,17 +60,31 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
|||
assert(0);
|
||||
}
|
||||
|
||||
memcpy(ostart + flSize, src, srcSize);
|
||||
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
||||
ZSTD_memcpy(ostart + flSize, src, srcSize);
|
||||
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
||||
return srcSize + flSize;
|
||||
}
|
||||
|
||||
static int allBytesIdentical(const void* src, size_t srcSize)
|
||||
{
|
||||
assert(srcSize >= 1);
|
||||
assert(src != NULL);
|
||||
{ const BYTE b = ((const BYTE*)src)[0];
|
||||
size_t p;
|
||||
for (p=1; p<srcSize; p++) {
|
||||
if (((const BYTE*)src)[p] != b) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE* const)dst;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||
|
||||
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
|
||||
assert(dstCapacity >= 4); (void)dstCapacity;
|
||||
assert(allBytesIdentical(src, srcSize));
|
||||
|
||||
switch(flSize)
|
||||
{
|
||||
|
|
@ -63,68 +102,103 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
|
|||
}
|
||||
|
||||
ostart[flSize] = *(const BYTE*)src;
|
||||
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
|
||||
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
|
||||
return flSize+1;
|
||||
}
|
||||
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2)
|
||||
/* ZSTD_minLiteralsToCompress() :
|
||||
* returns minimal amount of literals
|
||||
* for literal compression to even be attempted.
|
||||
* Minimum is made tighter as compression strategy increases.
|
||||
*/
|
||||
static size_t
|
||||
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
|
||||
{
|
||||
assert((int)strategy >= 0);
|
||||
assert((int)strategy <= 9);
|
||||
/* btultra2 : min 8 bytes;
|
||||
* then 2x larger for each successive compression strategy
|
||||
* max threshold 64 bytes */
|
||||
{ int const shift = MIN(9-(int)strategy, 3);
|
||||
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
|
||||
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
|
||||
return mintc;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_compressLiterals (
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy,
|
||||
int disableLiteralCompression,
|
||||
int suspectUncompressible,
|
||||
int bmi2)
|
||||
{
|
||||
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 singleStream = srcSize < 256;
|
||||
symbolEncodingType_e hType = set_compressed;
|
||||
size_t cLitSize;
|
||||
|
||||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
|
||||
disableLiteralCompression, (U32)srcSize);
|
||||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
|
||||
disableLiteralCompression, (U32)srcSize, dstCapacity);
|
||||
|
||||
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
if (disableLiteralCompression)
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
|
||||
/* small ? don't even attempt compression (speed opt) */
|
||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
/* if too small, don't even attempt compression (speed opt) */
|
||||
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
|
||||
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
|
||||
{ HUF_repeat repeat = prevHuf->repeatMode;
|
||||
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
||||
int const flags = 0
|
||||
| (bmi2 ? HUF_flags_bmi2 : 0)
|
||||
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
|
||||
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
|
||||
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
|
||||
|
||||
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
|
||||
huf_compress_f huf_compress;
|
||||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
||||
cLitSize = singleStream ?
|
||||
HUF_compress1X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
||||
HUF_compress4X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
|
||||
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
|
||||
src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, LitHufLog,
|
||||
entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable,
|
||||
&repeat, flags);
|
||||
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
|
||||
if (repeat != HUF_repeat_none) {
|
||||
/* reused the existing table */
|
||||
DEBUGLOG(5, "Reusing previous huffman table");
|
||||
DEBUGLOG(5, "reusing statistics from previous huffman block");
|
||||
hType = set_repeat;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
} }
|
||||
if (cLitSize==1) {
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
/* A return value of 1 signals that the alphabet consists of a single symbol.
|
||||
* However, in some rare circumstances, it could be the compressed size (a single byte).
|
||||
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
|
||||
* (it's also necessary to not generate statistics).
|
||||
* Therefore, in such a case, actively check that all bytes are identical. */
|
||||
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||
} }
|
||||
|
||||
if (hType == set_compressed) {
|
||||
/* using a newly constructed table */
|
||||
|
|
@ -135,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|||
switch(lhSize)
|
||||
{
|
||||
case 3: /* 2 - 2 - 10 - 10 */
|
||||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
||||
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
||||
MEM_writeLE24(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 4: /* 2 - 2 - 14 - 14 */
|
||||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 5: /* 2 - 2 - 18 - 18 */
|
||||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
|
||||
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
ostart[4] = (BYTE)(cLitSize >> 10);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -16,14 +16,24 @@
|
|||
|
||||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
/* ZSTD_compressRleLiteralsBlock() :
|
||||
* Conditions :
|
||||
* - All bytes in @src are identical
|
||||
* - dstCapacity >= 4 */
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
/* ZSTD_compressLiterals():
|
||||
* @entropyWorkspace: must be aligned on 4-bytes boundaries
|
||||
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
|
||||
* @suspectUncompressible: sampling checks, to potentially skip huffman coding
|
||||
*/
|
||||
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2);
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
int suspectUncompressible,
|
||||
int bmi2);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
|||
return maxSymbolValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if we should use ncount=-1 else we should
|
||||
* use ncount=1 for low probability symbols instead.
|
||||
*/
|
||||
static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
|
||||
{
|
||||
/* Heuristic: This should cover most blocks <= 16K and
|
||||
* start to fade out after 16K to about 32K depending on
|
||||
* compressibility.
|
||||
*/
|
||||
return nbSeq >= 2048;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost in bytes of encoding the normalized count header.
|
||||
* Returns an error if any of the helper functions return an error.
|
||||
|
|
@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
|||
BYTE wksp[FSE_NCOUNTBOUND];
|
||||
S16 norm[MaxSeq + 1];
|
||||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
|
||||
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
||||
}
|
||||
|
||||
|
|
@ -72,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
|
|||
{
|
||||
unsigned cost = 0;
|
||||
unsigned s;
|
||||
|
||||
assert(total > 0);
|
||||
for (s = 0; s <= max; ++s) {
|
||||
unsigned norm = (unsigned)((256 * count[s]) / total);
|
||||
if (count[s] != 0 && norm == 0)
|
||||
|
|
@ -151,7 +166,7 @@ ZSTD_selectEncodingType(
|
|||
if (mostFrequent == nbSeq) {
|
||||
*repeatMode = FSE_repeat_none;
|
||||
if (isDefaultAllowed && nbSeq <= 2) {
|
||||
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
||||
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
|
||||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
||||
* If basic encoding isn't possible, always choose RLE.
|
||||
*/
|
||||
|
|
@ -219,6 +234,11 @@ ZSTD_selectEncodingType(
|
|||
return set_compressed;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
S16 norm[MaxSeq + 1];
|
||||
U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
|
||||
} ZSTD_BuildCTableWksp;
|
||||
|
||||
size_t
|
||||
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
||||
|
|
@ -239,13 +259,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|||
*op = codeTable[0];
|
||||
return 1;
|
||||
case set_repeat:
|
||||
memcpy(nextCTable, prevCTable, prevCTableSize);
|
||||
ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
|
||||
return 0;
|
||||
case set_basic:
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
|
||||
return 0;
|
||||
case set_compressed: {
|
||||
S16 norm[MaxSeq + 1];
|
||||
ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
|
||||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
||||
if (count[codeTable[nbSeq-1]] > 1) {
|
||||
|
|
@ -253,10 +273,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|||
nbSeq_1--;
|
||||
}
|
||||
assert(nbSeq_1 > 1);
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
|
||||
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
||||
(void)entropyWorkspaceSize;
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
||||
assert(oend >= op);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
||||
return NCountSize;
|
||||
}
|
||||
}
|
||||
|
|
@ -290,19 +313,19 @@ ZSTD_encodeSequences_body(
|
|||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
U32 const ofBits = ofCodeTable[nbSeq-1];
|
||||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
|
||||
BIT_flushBits(&blockStream);
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
|
||||
ofBits - extraBits);
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
|
||||
}
|
||||
BIT_flushBits(&blockStream);
|
||||
|
||||
|
|
@ -316,8 +339,8 @@ ZSTD_encodeSequences_body(
|
|||
U32 const mlBits = ML_bits[mlCode];
|
||||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
||||
(unsigned)sequences[n].litLength,
|
||||
(unsigned)sequences[n].matchLength + MINMATCH,
|
||||
(unsigned)sequences[n].offset);
|
||||
(unsigned)sequences[n].mlBase + MINMATCH,
|
||||
(unsigned)sequences[n].offBase);
|
||||
/* 32b*/ /* 64b*/
|
||||
/* (7)*/ /* (7)*/
|
||||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
||||
|
|
@ -328,18 +351,18 @@ ZSTD_encodeSequences_body(
|
|||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
||||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
||||
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
|
||||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
||||
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
||||
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
|
||||
ofBits - extraBits); /* 31 */
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
||||
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
|
||||
}
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
||||
|
|
@ -376,7 +399,7 @@ ZSTD_encodeSequences_default(
|
|||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
ZSTD_encodeSequences_bmi2(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -15,288 +15,10 @@
|
|||
|
||||
#include "zstd_internal.h" /* ZSTD_getSequenceLength */
|
||||
#include "hist.h" /* HIST_countFast_wksp */
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
|
||||
#include "zstd_compress_sequences.h"
|
||||
#include "zstd_compress_literals.h"
|
||||
|
||||
/*-*************************************
|
||||
* Superblock entropy buffer structs
|
||||
***************************************/
|
||||
/** ZSTD_hufCTablesMetadata_t :
|
||||
* Stores Literals Block Type for a super-block in hType, and
|
||||
* huffman tree description in hufDesBuffer.
|
||||
* hufDesSize refers to the size of huffman tree description in bytes.
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e hType;
|
||||
BYTE hufDesBuffer[500]; /* TODO give name to this value */
|
||||
size_t hufDesSize;
|
||||
} ZSTD_hufCTablesMetadata_t;
|
||||
|
||||
/** ZSTD_fseCTablesMetadata_t :
|
||||
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
||||
* fse tables in fseTablesBuffer.
|
||||
* fseTablesSize refers to the size of fse tables in bytes.
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e llType;
|
||||
symbolEncodingType_e ofType;
|
||||
symbolEncodingType_e mlType;
|
||||
BYTE fseTablesBuffer[500]; /* TODO give name to this value */
|
||||
size_t fseTablesSize;
|
||||
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
|
||||
} ZSTD_fseCTablesMetadata_t;
|
||||
|
||||
typedef struct {
|
||||
ZSTD_hufCTablesMetadata_t hufMetadata;
|
||||
ZSTD_fseCTablesMetadata_t fseMetadata;
|
||||
} ZSTD_entropyCTablesMetadata_t;
|
||||
|
||||
|
||||
/** ZSTD_buildSuperBlockEntropy_literal() :
|
||||
* Builds entropy for the super-block literals.
|
||||
* Stores literals block type (raw, rle, compressed, repeat) and
|
||||
* huffman description table to hufMetadata.
|
||||
* @return : size of huffman description table or error code */
|
||||
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const int disableLiteralsCompression,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const wkspStart = (BYTE*)workspace;
|
||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
||||
BYTE* const countWkspStart = wkspStart;
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
||||
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
||||
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
||||
unsigned maxSymbolValue = 255;
|
||||
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
||||
HUF_repeat repeat = prevHuf->repeatMode;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
if (disableLiteralsCompression) {
|
||||
DEBUGLOG(5, "set_basic - disabled");
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* small ? don't even attempt compression (speed opt) */
|
||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||
if (srcSize <= minLitSize) {
|
||||
DEBUGLOG(5, "set_basic - too small");
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
||||
if (largest == srcSize) {
|
||||
DEBUGLOG(5, "set_rle");
|
||||
hufMetadata->hType = set_rle;
|
||||
return 0;
|
||||
}
|
||||
if (largest <= (srcSize >> 7)+4) {
|
||||
DEBUGLOG(5, "set_basic - no gain");
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate the previous Huffman table */
|
||||
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
||||
repeat = HUF_repeat_none;
|
||||
}
|
||||
|
||||
/* Build Huffman Tree */
|
||||
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
||||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
||||
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
||||
maxSymbolValue, huffLog,
|
||||
nodeWksp, nodeWkspSize);
|
||||
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
||||
huffLog = (U32)maxBits;
|
||||
{ /* Build and write the CTable */
|
||||
size_t const newCSize = HUF_estimateCompressedSize(
|
||||
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
||||
size_t const hSize = HUF_writeCTable(
|
||||
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
||||
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
|
||||
/* Check against repeating the previous CTable */
|
||||
if (repeat != HUF_repeat_none) {
|
||||
size_t const oldCSize = HUF_estimateCompressedSize(
|
||||
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
||||
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
||||
DEBUGLOG(5, "set_repeat - smaller");
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
hufMetadata->hType = set_repeat;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (newCSize + hSize >= srcSize) {
|
||||
DEBUGLOG(5, "set_basic - no gains");
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
||||
hufMetadata->hType = set_compressed;
|
||||
nextHuf->repeatMode = HUF_repeat_check;
|
||||
return hSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_buildSuperBlockEntropy_sequences() :
|
||||
* Builds entropy for the super-block sequences.
|
||||
* Stores symbol compression modes and fse table to fseMetadata.
|
||||
* @return : size of fse tables or error code */
|
||||
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
||||
const ZSTD_fseCTables_t* prevEntropy,
|
||||
ZSTD_fseCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const wkspStart = (BYTE*)workspace;
|
||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
||||
BYTE* const countWkspStart = wkspStart;
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
||||
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
||||
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
||||
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
||||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
||||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
||||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
||||
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
||||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
||||
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
||||
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
||||
BYTE* op = ostart;
|
||||
|
||||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
||||
memset(workspace, 0, wkspSize);
|
||||
|
||||
fseMetadata->lastCountSize = 0;
|
||||
/* convert length/distances into codes */
|
||||
ZSTD_seqToCodes(seqStorePtr);
|
||||
/* build CTable for Literal Lengths */
|
||||
{ U32 LLtype;
|
||||
unsigned max = MaxLL;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
DEBUGLOG(5, "Building LL table");
|
||||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
||||
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
LLFSELog, prevEntropy->litlengthCTable,
|
||||
LL_defaultNorm, LL_defaultNormLog,
|
||||
ZSTD_defaultAllowed, strategy);
|
||||
assert(set_basic < set_compressed && set_rle < set_compressed);
|
||||
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
||||
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
||||
if (LLtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->llType = (symbolEncodingType_e) LLtype;
|
||||
} }
|
||||
/* build CTable for Offsets */
|
||||
{ U32 Offtype;
|
||||
unsigned max = MaxOff;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
||||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
||||
DEBUGLOG(5, "Building OF table");
|
||||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
||||
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
OffFSELog, prevEntropy->offcodeCTable,
|
||||
OF_defaultNorm, OF_defaultNormLog,
|
||||
defaultPolicy, strategy);
|
||||
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
||||
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
||||
if (Offtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
|
||||
} }
|
||||
/* build CTable for MatchLengths */
|
||||
{ U32 MLtype;
|
||||
unsigned max = MaxML;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
|
||||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
||||
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
MLFSELog, prevEntropy->matchlengthCTable,
|
||||
ML_defaultNorm, ML_defaultNormLog,
|
||||
ZSTD_defaultAllowed, strategy);
|
||||
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
||||
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
||||
if (MLtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
|
||||
} }
|
||||
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_buildSuperBlockEntropy() :
|
||||
* Builds entropy for the super-block.
|
||||
* @return : 0 on success or error code */
|
||||
static size_t
|
||||
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* prevEntropy,
|
||||
ZSTD_entropyCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
|
||||
entropyMetadata->hufMetadata.hufDesSize =
|
||||
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
|
||||
&prevEntropy->huf, &nextEntropy->huf,
|
||||
&entropyMetadata->hufMetadata,
|
||||
ZSTD_disableLiteralsCompression(cctxParams),
|
||||
workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
|
||||
entropyMetadata->fseMetadata.fseTablesSize =
|
||||
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
|
||||
&prevEntropy->fse, &nextEntropy->fse,
|
||||
cctxParams,
|
||||
&entropyMetadata->fseMetadata,
|
||||
workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock_literal() :
|
||||
* Compresses literals section for a sub-block.
|
||||
* When we have to write the Huffman table we will sometimes choose a header
|
||||
|
|
@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|||
* before we know the table size + compressed size, so we have a bound on the
|
||||
* table size. If we guessed incorrectly, we fall back to uncompressed literals.
|
||||
*
|
||||
* We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
|
||||
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
|
||||
* in writing the header, otherwise it is set to 0.
|
||||
*
|
||||
* hufMetadata->hType has literals block type info.
|
||||
|
|
@ -314,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|||
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
|
||||
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
|
||||
* @return : compressed size of literals section of a sub-block
|
||||
* Or 0 if it unable to compress.
|
||||
* Or 0 if unable to compress.
|
||||
* Or error code */
|
||||
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const BYTE* literals, size_t litSize,
|
||||
void* dst, size_t dstSize,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
static size_t
|
||||
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const BYTE* literals, size_t litSize,
|
||||
void* dst, size_t dstSize,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
{
|
||||
size_t const header = writeEntropy ? 200 : 0;
|
||||
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
|
||||
|
|
@ -331,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|||
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
|
||||
size_t cLitSize = 0;
|
||||
|
||||
(void)bmi2; /* TODO bmi2... */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
|
||||
|
||||
*entropyWritten = 0;
|
||||
|
|
@ -348,15 +69,15 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|||
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
|
||||
|
||||
if (writeEntropy && hufMetadata->hType == set_compressed) {
|
||||
memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
||||
ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
||||
op += hufMetadata->hufDesSize;
|
||||
cLitSize += hufMetadata->hufDesSize;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
||||
}
|
||||
|
||||
/* TODO bmi2 */
|
||||
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
|
||||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
|
||||
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
|
||||
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
|
||||
: HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
|
||||
op += cSize;
|
||||
cLitSize += cSize;
|
||||
if (cSize == 0 || ERR_isError(cSize)) {
|
||||
|
|
@ -381,7 +102,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|||
switch(lhSize)
|
||||
{
|
||||
case 3: /* 2 - 2 - 10 - 10 */
|
||||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
||||
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
||||
MEM_writeLE24(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
|
|
@ -401,25 +122,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|||
}
|
||||
*entropyWritten = 1;
|
||||
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
|
||||
return op-ostart;
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
|
||||
const seqDef* const sstart = sequences;
|
||||
const seqDef* const send = sequences + nbSeq;
|
||||
const seqDef* sp = sstart;
|
||||
static size_t
|
||||
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
|
||||
const seqDef* sequences, size_t nbSeqs,
|
||||
size_t litSize, int lastSubBlock)
|
||||
{
|
||||
size_t matchLengthSum = 0;
|
||||
size_t litLengthSum = 0;
|
||||
while (send-sp > 0) {
|
||||
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
||||
size_t n;
|
||||
for (n=0; n<nbSeqs; n++) {
|
||||
const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
|
||||
litLengthSum += seqLen.litLength;
|
||||
matchLengthSum += seqLen.matchLength;
|
||||
sp++;
|
||||
}
|
||||
assert(litLengthSum <= litSize);
|
||||
if (!lastSequence) {
|
||||
DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
|
||||
(unsigned)nbSeqs, (const void*)sequences,
|
||||
(unsigned)litLengthSum, (unsigned)matchLengthSum);
|
||||
if (!lastSubBlock)
|
||||
assert(litLengthSum == litSize);
|
||||
}
|
||||
else
|
||||
assert(litLengthSum <= litSize);
|
||||
(void)litLengthSum;
|
||||
return matchLengthSum + litSize;
|
||||
}
|
||||
|
||||
|
|
@ -433,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
|||
* @return : compressed size of sequences section of a sub-block
|
||||
* Or 0 if it is unable to compress
|
||||
* Or error code. */
|
||||
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
||||
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
static size_t
|
||||
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
||||
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, int writeEntropy, int* entropyWritten)
|
||||
{
|
||||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
|
|
@ -453,14 +180,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|||
/* Sequences Header */
|
||||
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
|
||||
dstSize_tooSmall, "");
|
||||
if (nbSeq < 0x7F)
|
||||
if (nbSeq < 128)
|
||||
*op++ = (BYTE)nbSeq;
|
||||
else if (nbSeq < LONGNBSEQ)
|
||||
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
|
||||
else
|
||||
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
||||
if (nbSeq==0) {
|
||||
return op - ostart;
|
||||
return (size_t)(op - ostart);
|
||||
}
|
||||
|
||||
/* seqHead : flags for FSE encoding type */
|
||||
|
|
@ -474,7 +201,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|||
const U32 MLtype = fseMetadata->mlType;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
|
||||
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
|
||||
memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
||||
ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
||||
op += fseMetadata->fseTablesSize;
|
||||
} else {
|
||||
const U32 repeat = set_repeat;
|
||||
|
|
@ -482,7 +209,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|||
}
|
||||
|
||||
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
||||
op, oend - op,
|
||||
op, (size_t)(oend - op),
|
||||
fseTables->matchlengthCTable, mlCode,
|
||||
fseTables->offcodeCTable, ofCode,
|
||||
fseTables->litlengthCTable, llCode,
|
||||
|
|
@ -526,7 +253,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|||
#endif
|
||||
|
||||
*entropyWritten = 1;
|
||||
return op - ostart;
|
||||
return (size_t)(op - ostart);
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock() :
|
||||
|
|
@ -552,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|||
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
|
||||
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
|
||||
&entropyMetadata->hufMetadata, literals, litSize,
|
||||
op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
|
||||
op, (size_t)(oend-op),
|
||||
bmi2, writeLitEntropy, litEntropyWritten);
|
||||
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
|
||||
if (cLitSize == 0) return 0;
|
||||
op += cLitSize;
|
||||
|
|
@ -562,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|||
sequences, nbSeq,
|
||||
llCode, mlCode, ofCode,
|
||||
cctxParams,
|
||||
op, oend-op,
|
||||
op, (size_t)(oend-op),
|
||||
bmi2, writeSeqEntropy, seqEntropyWritten);
|
||||
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
|
||||
if (cSeqSize == 0) return 0;
|
||||
op += cSeqSize;
|
||||
}
|
||||
/* Write block header */
|
||||
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
|
||||
{ size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
|
||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||
MEM_writeLE24(ostart, cBlockHeader24);
|
||||
}
|
||||
return op-ostart;
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
||||
|
|
@ -602,8 +330,8 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
|||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||
const BYTE* codeTable, unsigned maxCode,
|
||||
size_t nbSeq, const FSE_CTable* fseCTable,
|
||||
const U32* additionalBits,
|
||||
short const* defaultNorm, U32 defaultNormLog,
|
||||
const U8* additionalBits,
|
||||
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
|
|
@ -615,7 +343,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|||
|
||||
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
if (type == set_basic) {
|
||||
cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
|
||||
/* We selected this encoding type, so it must be valid. */
|
||||
assert(max <= defaultMax);
|
||||
cSymbolTypeSizeEstimateInBits = max <= defaultMax
|
||||
? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
|
||||
: ERROR(GENERIC);
|
||||
} else if (type == set_rle) {
|
||||
cSymbolTypeSizeEstimateInBits = 0;
|
||||
} else if (type == set_compressed || type == set_repeat) {
|
||||
|
|
@ -639,25 +371,30 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|||
void* workspace, size_t wkspSize,
|
||||
int writeEntropy)
|
||||
{
|
||||
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
||||
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
||||
size_t cSeqSizeEstimate = 0;
|
||||
if (nbSeq == 0) return sequencesSectionHeaderSize;
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
||||
nbSeq, fseTables->offcodeCTable, NULL,
|
||||
OF_defaultNorm, OF_defaultNormLog,
|
||||
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
workspace, wkspSize);
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
|
||||
nbSeq, fseTables->litlengthCTable, LL_bits,
|
||||
LL_defaultNorm, LL_defaultNormLog,
|
||||
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||
workspace, wkspSize);
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
|
||||
nbSeq, fseTables->matchlengthCTable, ML_bits,
|
||||
ML_defaultNorm, ML_defaultNormLog,
|
||||
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||
workspace, wkspSize);
|
||||
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
||||
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
||||
}
|
||||
|
||||
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
||||
typedef struct {
|
||||
size_t estLitSize;
|
||||
size_t estBlockSize;
|
||||
} EstimatedBlockSize;
|
||||
static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
||||
const BYTE* ofCodeTable,
|
||||
const BYTE* llCodeTable,
|
||||
const BYTE* mlCodeTable,
|
||||
|
|
@ -665,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
|||
const ZSTD_entropyCTables_t* entropy,
|
||||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize,
|
||||
int writeLitEntropy, int writeSeqEntropy) {
|
||||
size_t cSizeEstimate = 0;
|
||||
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
|
||||
&entropy->huf, &entropyMetadata->hufMetadata,
|
||||
workspace, wkspSize, writeLitEntropy);
|
||||
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
||||
int writeLitEntropy, int writeSeqEntropy)
|
||||
{
|
||||
EstimatedBlockSize ebs;
|
||||
ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
|
||||
&entropy->huf, &entropyMetadata->hufMetadata,
|
||||
workspace, wkspSize, writeLitEntropy);
|
||||
ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
||||
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
||||
workspace, wkspSize, writeSeqEntropy);
|
||||
return cSizeEstimate + ZSTD_blockHeaderSize;
|
||||
ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
|
||||
return ebs;
|
||||
}
|
||||
|
||||
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
|
||||
|
|
@ -687,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
|
|||
return 0;
|
||||
}
|
||||
|
||||
static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
|
||||
{
|
||||
size_t n, total = 0;
|
||||
assert(sp != NULL);
|
||||
for (n=0; n<seqCount; n++) {
|
||||
total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
|
||||
}
|
||||
DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
|
||||
return total;
|
||||
}
|
||||
|
||||
#define BYTESCALE 256
|
||||
|
||||
static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
|
||||
size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
|
||||
int firstSubBlock)
|
||||
{
|
||||
size_t n, budget = 0, inSize=0;
|
||||
/* entropy headers */
|
||||
size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
|
||||
assert(firstSubBlock==0 || firstSubBlock==1);
|
||||
budget += headerSize;
|
||||
|
||||
/* first sequence => at least one sequence*/
|
||||
budget += sp[0].litLength * avgLitCost + avgSeqCost;
|
||||
if (budget > targetBudget) return 1;
|
||||
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
|
||||
|
||||
/* loop over sequences */
|
||||
for (n=1; n<nbSeqs; n++) {
|
||||
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
|
||||
budget += currentCost;
|
||||
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
|
||||
/* stop when sub-block budget is reached */
|
||||
if ( (budget > targetBudget)
|
||||
/* though continue to expand until the sub-block is deemed compressible */
|
||||
&& (budget < inSize * BYTESCALE) )
|
||||
break;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock_multi() :
|
||||
* Breaks super-block into multiple sub-blocks and compresses them.
|
||||
* Entropy will be written to the first block.
|
||||
* The following blocks will use repeat mode to compress.
|
||||
* All sub-blocks are compressed blocks (no raw or rle blocks).
|
||||
* @return : compressed size of the super block (which is multiple ZSTD blocks)
|
||||
* Or 0 if it failed to compress. */
|
||||
* Entropy will be written into the first block.
|
||||
* The following blocks use repeat_mode to compress.
|
||||
* Sub-blocks are all compressed, except the last one when beneficial.
|
||||
* @return : compressed size of the super block (which features multiple ZSTD blocks)
|
||||
* or 0 if it failed to compress. */
|
||||
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
const ZSTD_compressedBlockState_t* prevCBlock,
|
||||
ZSTD_compressedBlockState_t* nextCBlock,
|
||||
|
|
@ -706,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|||
{
|
||||
const seqDef* const sstart = seqStorePtr->sequencesStart;
|
||||
const seqDef* const send = seqStorePtr->sequences;
|
||||
const seqDef* sp = sstart;
|
||||
const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
|
||||
size_t const nbSeqs = (size_t)(send - sstart);
|
||||
const BYTE* const lstart = seqStorePtr->litStart;
|
||||
const BYTE* const lend = seqStorePtr->lit;
|
||||
const BYTE* lp = lstart;
|
||||
size_t const nbLiterals = (size_t)(lend - lstart);
|
||||
BYTE const* ip = (BYTE const*)src;
|
||||
BYTE const* const iend = ip + srcSize;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
|
|
@ -718,120 +502,179 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|||
const BYTE* llCodePtr = seqStorePtr->llCode;
|
||||
const BYTE* mlCodePtr = seqStorePtr->mlCode;
|
||||
const BYTE* ofCodePtr = seqStorePtr->ofCode;
|
||||
size_t targetCBlockSize = cctxParams->targetCBlockSize;
|
||||
size_t litSize, seqCount;
|
||||
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
|
||||
size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
|
||||
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
|
||||
int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
|
||||
int writeSeqEntropy = 1;
|
||||
int lastSequence = 0;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
|
||||
(unsigned)(lend-lp), (unsigned)(send-sstart));
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
|
||||
(unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
|
||||
|
||||
litSize = 0;
|
||||
seqCount = 0;
|
||||
do {
|
||||
size_t cBlockSizeEstimate = 0;
|
||||
if (sstart == send) {
|
||||
lastSequence = 1;
|
||||
} else {
|
||||
const seqDef* const sequence = sp + seqCount;
|
||||
lastSequence = sequence == send - 1;
|
||||
litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
|
||||
seqCount++;
|
||||
/* let's start by a general estimation for the full block */
|
||||
if (nbSeqs > 0) {
|
||||
EstimatedBlockSize const ebs =
|
||||
ZSTD_estimateSubBlockSize(lp, nbLiterals,
|
||||
ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
|
||||
&nextCBlock->entropy, entropyMetadata,
|
||||
workspace, wkspSize,
|
||||
writeLitEntropy, writeSeqEntropy);
|
||||
/* quick estimation */
|
||||
size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
|
||||
size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
|
||||
const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
|
||||
size_t n, avgBlockBudget, blockBudgetSupp=0;
|
||||
avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
|
||||
DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
|
||||
(unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
|
||||
(unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
|
||||
/* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
|
||||
* this will result in the production of a single uncompressed block covering @srcSize.*/
|
||||
if (ebs.estBlockSize > srcSize) return 0;
|
||||
|
||||
/* compress and write sub-blocks */
|
||||
assert(nbSubBlocks>0);
|
||||
for (n=0; n < nbSubBlocks-1; n++) {
|
||||
/* determine nb of sequences for current sub-block + nbLiterals from next sequence */
|
||||
size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
|
||||
avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
|
||||
/* if reached last sequence : break to last sub-block (simplification) */
|
||||
assert(seqCount <= (size_t)(send-sp));
|
||||
if (sp + seqCount == send) break;
|
||||
assert(seqCount > 0);
|
||||
/* compress sub-block */
|
||||
{ int litEntropyWritten = 0;
|
||||
int seqEntropyWritten = 0;
|
||||
size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
|
||||
const size_t decompressedSize =
|
||||
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
|
||||
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
||||
sp, seqCount,
|
||||
lp, litSize,
|
||||
llCodePtr, mlCodePtr, ofCodePtr,
|
||||
cctxParams,
|
||||
op, (size_t)(oend-op),
|
||||
bmi2, writeLitEntropy, writeSeqEntropy,
|
||||
&litEntropyWritten, &seqEntropyWritten,
|
||||
0);
|
||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
||||
|
||||
/* check compressibility, update state components */
|
||||
if (cSize > 0 && cSize < decompressedSize) {
|
||||
DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
|
||||
(unsigned)decompressedSize, (unsigned)cSize);
|
||||
assert(ip + decompressedSize <= iend);
|
||||
ip += decompressedSize;
|
||||
lp += litSize;
|
||||
op += cSize;
|
||||
llCodePtr += seqCount;
|
||||
mlCodePtr += seqCount;
|
||||
ofCodePtr += seqCount;
|
||||
/* Entropy only needs to be written once */
|
||||
if (litEntropyWritten) {
|
||||
writeLitEntropy = 0;
|
||||
}
|
||||
if (seqEntropyWritten) {
|
||||
writeSeqEntropy = 0;
|
||||
}
|
||||
sp += seqCount;
|
||||
blockBudgetSupp = 0;
|
||||
} }
|
||||
/* otherwise : do not compress yet, coalesce current sub-block with following one */
|
||||
}
|
||||
if (lastSequence) {
|
||||
assert(lp <= lend);
|
||||
assert(litSize <= (size_t)(lend - lp));
|
||||
litSize = (size_t)(lend - lp);
|
||||
}
|
||||
/* I think there is an optimization opportunity here.
|
||||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
||||
* since it recalculates estimate from scratch.
|
||||
* For example, it would recount literal distribution and symbol codes everytime.
|
||||
*/
|
||||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
||||
&nextCBlock->entropy, entropyMetadata,
|
||||
workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
|
||||
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
|
||||
int litEntropyWritten = 0;
|
||||
int seqEntropyWritten = 0;
|
||||
const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
|
||||
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
||||
sp, seqCount,
|
||||
lp, litSize,
|
||||
llCodePtr, mlCodePtr, ofCodePtr,
|
||||
cctxParams,
|
||||
op, oend-op,
|
||||
bmi2, writeLitEntropy, writeSeqEntropy,
|
||||
&litEntropyWritten, &seqEntropyWritten,
|
||||
lastBlock && lastSequence);
|
||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
||||
if (cSize > 0 && cSize < decompressedSize) {
|
||||
DEBUGLOG(5, "Committed the sub-block");
|
||||
assert(ip + decompressedSize <= iend);
|
||||
ip += decompressedSize;
|
||||
sp += seqCount;
|
||||
lp += litSize;
|
||||
op += cSize;
|
||||
llCodePtr += seqCount;
|
||||
mlCodePtr += seqCount;
|
||||
ofCodePtr += seqCount;
|
||||
litSize = 0;
|
||||
seqCount = 0;
|
||||
/* Entropy only needs to be written once */
|
||||
if (litEntropyWritten) {
|
||||
writeLitEntropy = 0;
|
||||
}
|
||||
if (seqEntropyWritten) {
|
||||
writeSeqEntropy = 0;
|
||||
}
|
||||
} /* if (nbSeqs > 0) */
|
||||
|
||||
/* write last block */
|
||||
DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
|
||||
{ int litEntropyWritten = 0;
|
||||
int seqEntropyWritten = 0;
|
||||
size_t litSize = (size_t)(lend - lp);
|
||||
size_t seqCount = (size_t)(send - sp);
|
||||
const size_t decompressedSize =
|
||||
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
|
||||
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
||||
sp, seqCount,
|
||||
lp, litSize,
|
||||
llCodePtr, mlCodePtr, ofCodePtr,
|
||||
cctxParams,
|
||||
op, (size_t)(oend-op),
|
||||
bmi2, writeLitEntropy, writeSeqEntropy,
|
||||
&litEntropyWritten, &seqEntropyWritten,
|
||||
lastBlock);
|
||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
||||
|
||||
/* update pointers, the nb of literals borrowed from next sequence must be preserved */
|
||||
if (cSize > 0 && cSize < decompressedSize) {
|
||||
DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
|
||||
(unsigned)decompressedSize, (unsigned)cSize);
|
||||
assert(ip + decompressedSize <= iend);
|
||||
ip += decompressedSize;
|
||||
lp += litSize;
|
||||
op += cSize;
|
||||
llCodePtr += seqCount;
|
||||
mlCodePtr += seqCount;
|
||||
ofCodePtr += seqCount;
|
||||
/* Entropy only needs to be written once */
|
||||
if (litEntropyWritten) {
|
||||
writeLitEntropy = 0;
|
||||
}
|
||||
if (seqEntropyWritten) {
|
||||
writeSeqEntropy = 0;
|
||||
}
|
||||
sp += seqCount;
|
||||
}
|
||||
} while (!lastSequence);
|
||||
}
|
||||
|
||||
|
||||
if (writeLitEntropy) {
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
|
||||
memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
||||
DEBUGLOG(5, "Literal entropy tables were never written");
|
||||
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
||||
}
|
||||
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
|
||||
/* If we haven't written our entropy tables, then we've violated our contract and
|
||||
* must emit an uncompressed block.
|
||||
*/
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
|
||||
DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ip < iend) {
|
||||
size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
|
||||
/* some data left : last part of the block sent uncompressed */
|
||||
size_t const rSize = (size_t)((iend - ip));
|
||||
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
|
||||
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
|
||||
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
||||
assert(cSize != 0);
|
||||
op += cSize;
|
||||
/* We have to regenerate the repcodes because we've skipped some sequences */
|
||||
if (sp < send) {
|
||||
seqDef const* seq;
|
||||
const seqDef* seq;
|
||||
repcodes_t rep;
|
||||
memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
||||
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
||||
for (seq = sstart; seq < sp; ++seq) {
|
||||
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
}
|
||||
memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
||||
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
||||
}
|
||||
}
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
|
||||
return op-ostart;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
|
||||
(unsigned)(op-ostart));
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
void* dst, size_t dstCapacity,
|
||||
void const* src, size_t srcSize,
|
||||
unsigned lastBlock) {
|
||||
const void* src, size_t srcSize,
|
||||
unsigned lastBlock)
|
||||
{
|
||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
|
||||
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
|
||||
&zc->blockState.prevCBlock->entropy,
|
||||
&zc->blockState.nextCBlock->entropy,
|
||||
&zc->appliedParams,
|
||||
&entropyMetadata,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||
|
||||
return ZSTD_compressSubBlock_multi(&zc->seqStore,
|
||||
zc->blockState.prevCBlock,
|
||||
|
|
@ -841,5 +684,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
|||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
zc->bmi2, lastBlock,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -14,7 +14,9 @@
|
|||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
||||
#include "zstd_internal.h"
|
||||
#include "portability_macros.h"
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
|
|
@ -35,15 +37,30 @@ extern "C" {
|
|||
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
|
||||
#endif
|
||||
|
||||
|
||||
/* Set our tables and aligneds to align by 64 bytes */
|
||||
#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
|
||||
|
||||
/*-*************************************
|
||||
* Structures
|
||||
***************************************/
|
||||
typedef enum {
|
||||
ZSTD_cwksp_alloc_objects,
|
||||
ZSTD_cwksp_alloc_buffers,
|
||||
ZSTD_cwksp_alloc_aligned
|
||||
ZSTD_cwksp_alloc_aligned_init_once,
|
||||
ZSTD_cwksp_alloc_aligned,
|
||||
ZSTD_cwksp_alloc_buffers
|
||||
} ZSTD_cwksp_alloc_phase_e;
|
||||
|
||||
/**
|
||||
* Used to describe whether the workspace is statically allocated (and will not
|
||||
* necessarily ever be freed), or if it's dynamically allocated and we can
|
||||
* expect a well-formed caller to free this.
|
||||
*/
|
||||
typedef enum {
|
||||
ZSTD_cwksp_dynamic_alloc,
|
||||
ZSTD_cwksp_static_alloc
|
||||
} ZSTD_cwksp_static_alloc_e;
|
||||
|
||||
/**
|
||||
* Zstd fits all its internal datastructures into a single continuous buffer,
|
||||
* so that it only needs to perform a single OS allocation (or so that a buffer
|
||||
|
|
@ -84,15 +101,15 @@ typedef enum {
|
|||
*
|
||||
* Workspace Layout:
|
||||
*
|
||||
* [ ... workspace ... ]
|
||||
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
|
||||
* [ ... workspace ... ]
|
||||
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
|
||||
*
|
||||
* The various objects that live in the workspace are divided into the
|
||||
* following categories, and are allocated separately:
|
||||
*
|
||||
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
|
||||
* so that literally everything fits in a single buffer. Note: if present,
|
||||
* this must be the first object in the workspace, since ZSTD_free{CCtx,
|
||||
* this must be the first object in the workspace, since ZSTD_customFree{CCtx,
|
||||
* CDict}() rely on a pointer comparison to see whether one or two frees are
|
||||
* required.
|
||||
*
|
||||
|
|
@ -107,10 +124,20 @@ typedef enum {
|
|||
* - Tables: these are any of several different datastructures (hash tables,
|
||||
* chain tables, binary trees) that all respect a common format: they are
|
||||
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
|
||||
* Their sizes depend on the cparams.
|
||||
* Their sizes depend on the cparams. These tables are 64-byte aligned.
|
||||
*
|
||||
* - Aligned: these buffers are used for various purposes that require 4 byte
|
||||
* alignment, but don't require any initialization before they're used.
|
||||
* - Init once: these buffers require to be initialized at least once before
|
||||
* use. They should be used when we want to skip memory initialization
|
||||
* while not triggering memory checkers (like Valgrind) when reading from
|
||||
* from this memory without writing to it first.
|
||||
* These buffers should be used carefully as they might contain data
|
||||
* from previous compressions.
|
||||
* Buffers are aligned to 64 bytes.
|
||||
*
|
||||
* - Aligned: these buffers don't require any initialization before they're
|
||||
* used. The user of the buffer should make sure they write into a buffer
|
||||
* location before reading from it.
|
||||
* Buffers are aligned to 64 bytes.
|
||||
*
|
||||
* - Buffers: these buffers are used for various purposes that don't require
|
||||
* any alignment or initialization before they're used. This means they can
|
||||
|
|
@ -122,9 +149,9 @@ typedef enum {
|
|||
* correctly packed into the workspace buffer. That order is:
|
||||
*
|
||||
* 1. Objects
|
||||
* 2. Buffers
|
||||
* 3. Aligned
|
||||
* 4. Tables
|
||||
* 2. Init once / Tables
|
||||
* 3. Aligned / Tables
|
||||
* 4. Buffers / Tables
|
||||
*
|
||||
* Attempts to reserve objects of different types out of order will fail.
|
||||
*/
|
||||
|
|
@ -136,10 +163,12 @@ typedef struct {
|
|||
void* tableEnd;
|
||||
void* tableValidEnd;
|
||||
void* allocStart;
|
||||
void* initOnceStart;
|
||||
|
||||
int allocFailed;
|
||||
BYTE allocFailed;
|
||||
int workspaceOversizedDuration;
|
||||
ZSTD_cwksp_alloc_phase_e phase;
|
||||
ZSTD_cwksp_static_alloc_e isStatic;
|
||||
} ZSTD_cwksp;
|
||||
|
||||
/*-*************************************
|
||||
|
|
@ -147,6 +176,7 @@ typedef struct {
|
|||
***************************************/
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
|
||||
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
|
||||
(void)ws;
|
||||
|
|
@ -156,6 +186,21 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
|
|||
assert(ws->tableEnd <= ws->allocStart);
|
||||
assert(ws->tableValidEnd <= ws->allocStart);
|
||||
assert(ws->allocStart <= ws->workspaceEnd);
|
||||
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
|
||||
assert(ws->workspace <= ws->initOnceStart);
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
{
|
||||
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
|
||||
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
|
||||
(void)offset;
|
||||
#if defined(ZSTD_MSAN_PRINT)
|
||||
if(offset!=-1) {
|
||||
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
|
||||
}
|
||||
#endif
|
||||
assert(offset==-1);
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -176,63 +221,72 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
|
|||
* Since tables aren't currently redzoned, you don't need to call through this
|
||||
* to figure out how much space you need for the matchState tables. Everything
|
||||
* else is though.
|
||||
*
|
||||
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#else
|
||||
return size;
|
||||
#endif
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
|
||||
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
|
||||
assert(phase >= ws->phase);
|
||||
if (phase > ws->phase) {
|
||||
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
|
||||
phase >= ZSTD_cwksp_alloc_buffers) {
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
}
|
||||
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
|
||||
phase >= ZSTD_cwksp_alloc_aligned) {
|
||||
/* If unaligned allocations down from a too-large top have left us
|
||||
* unaligned, we need to realign our alloc ptr. Technically, this
|
||||
* can consume space that is unaccounted for in the neededSpace
|
||||
* calculation. However, I believe this can only happen when the
|
||||
* workspace is too large, and specifically when it is too large
|
||||
* by a larger margin than the space that will be consumed. */
|
||||
/* TODO: cleaner, compiler warning friendly way to do this??? */
|
||||
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
|
||||
if (ws->allocStart < ws->tableValidEnd) {
|
||||
ws->tableValidEnd = ws->allocStart;
|
||||
}
|
||||
}
|
||||
ws->phase = phase;
|
||||
}
|
||||
/**
|
||||
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
|
||||
* Used to determine the number of bytes required for a given "aligned".
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
|
||||
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this object/buffer/etc was allocated in this workspace.
|
||||
* Returns the amount of additional space the cwksp must allocate
|
||||
* for internal purposes (currently only alignment).
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
|
||||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
|
||||
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
|
||||
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
|
||||
* bytes to align the beginning of tables section and end of buffers;
|
||||
*/
|
||||
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
|
||||
return slackSpace;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the number of additional bytes required to align a pointer to the given number of bytes.
|
||||
* alignBytes must be a power of two.
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
|
||||
size_t const alignBytesMask = alignBytes - 1;
|
||||
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
|
||||
assert((alignBytes & alignBytesMask) == 0);
|
||||
assert(bytes < alignBytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the initial value for allocStart which is used to determine the position from
|
||||
* which we can allocate from the end of the workspace.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
|
||||
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function. Do not use directly.
|
||||
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
|
||||
* which counts from the end of the wksp (as opposed to the object/table segment).
|
||||
*
|
||||
* Returns a pointer to the beginning of that space.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
|
||||
void* alloc;
|
||||
void* bottom = ws->tableEnd;
|
||||
ZSTD_cwksp_internal_advance_phase(ws, phase);
|
||||
alloc = (BYTE *)ws->allocStart - bytes;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* over-reserve space */
|
||||
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#endif
|
||||
|
||||
MEM_STATIC void*
|
||||
ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
|
||||
{
|
||||
void* const alloc = (BYTE*)ws->allocStart - bytes;
|
||||
void* const bottom = ws->tableEnd;
|
||||
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
|
||||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
|
|
@ -242,16 +296,88 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
|||
ws->allocFailed = 1;
|
||||
return NULL;
|
||||
}
|
||||
/* the area is reserved from the end of wksp.
|
||||
* If it overlaps with tableValidEnd, it voids guarantees on values' range */
|
||||
if (alloc < ws->tableValidEnd) {
|
||||
ws->tableValidEnd = alloc;
|
||||
}
|
||||
ws->allocStart = alloc;
|
||||
return alloc;
|
||||
}
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/**
|
||||
* Moves the cwksp to the next phase, and does any necessary allocations.
|
||||
* cwksp initialization must necessarily go through each phase in order.
|
||||
* Returns a 0 on success, or zstd error
|
||||
*/
|
||||
MEM_STATIC size_t
|
||||
ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
|
||||
{
|
||||
assert(phase >= ws->phase);
|
||||
if (phase > ws->phase) {
|
||||
/* Going from allocating objects to allocating initOnce / tables */
|
||||
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
|
||||
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
|
||||
|
||||
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
|
||||
void *const alloc = ws->objectEnd;
|
||||
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
void *const objectEnd = (BYTE *) alloc + bytesToAlign;
|
||||
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
|
||||
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
|
||||
"table phase - alignment initial allocation failed!");
|
||||
ws->objectEnd = objectEnd;
|
||||
ws->tableEnd = objectEnd; /* table area starts being empty */
|
||||
if (ws->tableValidEnd < ws->tableEnd) {
|
||||
ws->tableValidEnd = ws->tableEnd;
|
||||
}
|
||||
}
|
||||
}
|
||||
ws->phase = phase;
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this object/buffer/etc was allocated in this workspace.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
|
||||
{
|
||||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function. Do not use directly.
|
||||
*/
|
||||
MEM_STATIC void*
|
||||
ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
|
||||
{
|
||||
void* alloc;
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* over-reserve space */
|
||||
bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#endif
|
||||
|
||||
alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
|
||||
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
|
||||
* either size. */
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
if (alloc) {
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
/* We need to keep the redzone poisoned while unpoisoning the bytes that
|
||||
* are actually allocated. */
|
||||
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc;
|
||||
|
|
@ -260,33 +386,78 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
|||
/**
|
||||
* Reserves and returns unaligned memory.
|
||||
*/
|
||||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
|
||||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserves and returns memory sized on and aligned on sizeof(unsigned).
|
||||
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
||||
* This memory has been initialized at least once in the past.
|
||||
* This doesn't mean it has been initialized this time, and it might contain data from previous
|
||||
* operations.
|
||||
* The main usage is for algorithms that might need read access into uninitialized memory.
|
||||
* The algorithm must maintain safety under these conditions and must make sure it doesn't
|
||||
* leak any of the past data (directly or in side channels).
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
|
||||
assert((bytes & (sizeof(U32)-1)) == 0);
|
||||
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
|
||||
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
if(ptr && ptr < ws->initOnceStart) {
|
||||
/* We assume the memory following the current allocation is either:
|
||||
* 1. Not usable as initOnce memory (end of workspace)
|
||||
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
|
||||
* 3. An ASAN redzone, in which case we don't want to write on it
|
||||
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
|
||||
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
|
||||
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
|
||||
ws->initOnceStart = ptr;
|
||||
}
|
||||
#if ZSTD_MEMORY_SANITIZER
|
||||
assert(__msan_test_shadow(ptr, bytes) == -1);
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligned on sizeof(unsigned). These buffers have the special property that
|
||||
* their values remain constrained, allowing us to re-use them without
|
||||
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
|
||||
ZSTD_cwksp_alloc_aligned);
|
||||
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligned on 64 bytes. These buffers have the special property that
|
||||
* their values remain constrained, allowing us to reuse them without
|
||||
* memset()-ing them.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
||||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
|
||||
void* alloc = ws->tableEnd;
|
||||
void* end = (BYTE *)alloc + bytes;
|
||||
void* top = ws->allocStart;
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
|
||||
void* alloc;
|
||||
void* end;
|
||||
void* top;
|
||||
|
||||
/* We can only start allocating tables after we are done reserving space for objects at the
|
||||
* start of the workspace */
|
||||
if(ws->phase < phase) {
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
alloc = ws->tableEnd;
|
||||
end = (BYTE *)alloc + bytes;
|
||||
top = ws->allocStart;
|
||||
|
||||
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
|
||||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
|
||||
assert((bytes & (sizeof(U32)-1)) == 0);
|
||||
ZSTD_cwksp_internal_advance_phase(ws, phase);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
assert(end <= top);
|
||||
if (end > top) {
|
||||
|
|
@ -296,35 +467,41 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
|||
}
|
||||
ws->tableEnd = end;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
|
||||
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligned on sizeof(void*).
|
||||
* Note : should happen only once, at workspace first initialization
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
|
||||
void* alloc = ws->objectEnd;
|
||||
void* end = (BYTE*)alloc + roundedBytes;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* over-reserve space */
|
||||
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
#endif
|
||||
|
||||
DEBUGLOG(5,
|
||||
DEBUGLOG(4,
|
||||
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
|
||||
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
|
||||
assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
|
||||
assert((bytes & (sizeof(void*)-1)) == 0);
|
||||
assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
|
||||
assert(bytes % ZSTD_ALIGNOF(void*) == 0);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
/* we must be in the first phase, no advance is possible */
|
||||
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
|
||||
DEBUGLOG(4, "cwksp: object alloc failed!");
|
||||
DEBUGLOG(3, "cwksp: object alloc failed!");
|
||||
ws->allocFailed = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -332,27 +509,38 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
|||
ws->tableEnd = end;
|
||||
ws->tableValidEnd = end;
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
|
||||
* either size. */
|
||||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
__asan_unpoison_memory_region(alloc, bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
|
||||
{
|
||||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
|
||||
|
||||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the table re-use logic is sound, and that we don't
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the table reuse logic is sound, and that we don't
|
||||
* access table space that we haven't cleaned, we re-"poison" the table
|
||||
* space every time we mark it dirty. */
|
||||
* space every time we mark it dirty.
|
||||
* Since tableValidEnd space and initOnce space may overlap we don't poison
|
||||
* the initOnce portion as it break its promise. This means that this poisoning
|
||||
* check isn't always applied fully. */
|
||||
{
|
||||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
|
||||
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
|
||||
__msan_poison(ws->objectEnd, size);
|
||||
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
|
||||
__msan_poison(ws->objectEnd, size);
|
||||
} else {
|
||||
assert(ws->initOnceStart >= ws->objectEnd);
|
||||
__msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -380,7 +568,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
|
|||
assert(ws->tableValidEnd >= ws->objectEnd);
|
||||
assert(ws->tableValidEnd <= ws->allocStart);
|
||||
if (ws->tableValidEnd < ws->tableEnd) {
|
||||
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
|
||||
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
|
||||
}
|
||||
ZSTD_cwksp_mark_tables_clean(ws);
|
||||
}
|
||||
|
|
@ -392,8 +580,12 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
|
|||
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
|
||||
DEBUGLOG(4, "cwksp: clearing tables!");
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
{
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* We don't do this when the workspace is statically allocated, because
|
||||
* when that is the case, we have no capability to hook into the end of the
|
||||
* workspace's lifecycle to unpoison the memory.
|
||||
*/
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
|
||||
__asan_poison_memory_region(ws->objectEnd, size);
|
||||
}
|
||||
|
|
@ -410,77 +602,96 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
|
|||
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
||||
DEBUGLOG(4, "cwksp: clearing!");
|
||||
|
||||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the context re-use logic is sound, and that we don't
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
/* To validate that the context reuse logic is sound, and that we don't
|
||||
* access stuff that this compression hasn't initialized, we re-"poison"
|
||||
* the workspace (or at least the non-static, non-table parts of it)
|
||||
* every time we start a new compression. */
|
||||
* the workspace except for the areas in which we expect memory reuse
|
||||
* without initialization (objects, valid tables area and init once
|
||||
* memory). */
|
||||
{
|
||||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
|
||||
__msan_poison(ws->tableValidEnd, size);
|
||||
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
|
||||
size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
|
||||
__msan_poison(ws->tableValidEnd, size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
{
|
||||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
|
||||
/* We don't do this when the workspace is statically allocated, because
|
||||
* when that is the case, we have no capability to hook into the end of the
|
||||
* workspace's lifecycle to unpoison the memory.
|
||||
*/
|
||||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
||||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
|
||||
__asan_poison_memory_region(ws->objectEnd, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
ws->tableEnd = ws->objectEnd;
|
||||
ws->allocStart = ws->workspaceEnd;
|
||||
ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
|
||||
ws->allocFailed = 0;
|
||||
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
|
||||
ws->phase = ZSTD_cwksp_alloc_buffers;
|
||||
if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
|
||||
ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
|
||||
}
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
|
||||
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
|
||||
}
|
||||
|
||||
/**
|
||||
* The provided workspace takes ownership of the buffer [start, start+size).
|
||||
* Any existing values in the workspace are ignored (the previously managed
|
||||
* buffer, if present, must be separately freed).
|
||||
*/
|
||||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
|
||||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
|
||||
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
|
||||
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
|
||||
ws->workspace = start;
|
||||
ws->workspaceEnd = (BYTE*)start + size;
|
||||
ws->objectEnd = ws->workspace;
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
|
||||
ws->phase = ZSTD_cwksp_alloc_objects;
|
||||
ws->isStatic = isStatic;
|
||||
ZSTD_cwksp_clear(ws);
|
||||
ws->workspaceOversizedDuration = 0;
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
|
||||
void* workspace = ZSTD_malloc(size, customMem);
|
||||
void* workspace = ZSTD_customMalloc(size, customMem);
|
||||
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
|
||||
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
|
||||
ZSTD_cwksp_init(ws, workspace, size);
|
||||
ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
|
||||
void *ptr = ws->workspace;
|
||||
DEBUGLOG(4, "cwksp: freeing workspace");
|
||||
memset(ws, 0, sizeof(ZSTD_cwksp));
|
||||
ZSTD_free(ptr, customMem);
|
||||
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
||||
if (ptr != NULL && customMem.customFree != NULL) {
|
||||
__msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
|
||||
}
|
||||
#endif
|
||||
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
|
||||
ZSTD_customFree(ptr, customMem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the management of a workspace from one cwksp to another. The src cwksp
|
||||
* is left in an invalid state (src must be re-init()'ed before its used again).
|
||||
* is left in an invalid state (src must be re-init()'ed before it's used again).
|
||||
*/
|
||||
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
|
||||
*dst = *src;
|
||||
memset(src, 0, sizeof(ZSTD_cwksp));
|
||||
}
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
|
||||
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
|
||||
}
|
||||
|
||||
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
|
||||
|
|
@ -491,6 +702,18 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
|
|||
* Functions Checking Free Space
|
||||
***************************************/
|
||||
|
||||
/* ZSTD_alignmentSpaceWithinBounds() :
|
||||
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
|
||||
* actual amount of space used.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
|
||||
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
|
||||
* the alignment bytes difference between estimation and actual usage */
|
||||
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
|
||||
ZSTD_cwksp_used(ws) <= estimatedSpace;
|
||||
}
|
||||
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -14,12 +14,12 @@
|
|||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
||||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "huf.h"
|
||||
#include "zstd_decompress_internal.h"
|
||||
#include "zstd_ddict.h"
|
||||
|
|
@ -127,14 +127,14 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
|||
ddict->dictContent = dict;
|
||||
if (!dict) dictSize = 0;
|
||||
} else {
|
||||
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
|
||||
void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
|
||||
ddict->dictBuffer = internalBuffer;
|
||||
ddict->dictContent = internalBuffer;
|
||||
if (!internalBuffer) return ERROR(memory_allocation);
|
||||
memcpy(internalBuffer, dict, dictSize);
|
||||
ZSTD_memcpy(internalBuffer, dict, dictSize);
|
||||
}
|
||||
ddict->dictSize = dictSize;
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
|
||||
|
||||
/* parse dictionary content */
|
||||
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
|
||||
|
|
@ -147,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
|||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_customMem customMem)
|
||||
{
|
||||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
||||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
||||
|
||||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
|
||||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
|
||||
if (ddict == NULL) return NULL;
|
||||
ddict->cMem = customMem;
|
||||
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
|
||||
|
|
@ -198,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
|
|||
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
|
||||
if (sBufferSize < neededSpace) return NULL;
|
||||
if (dictLoadMethod == ZSTD_dlm_byCopy) {
|
||||
memcpy(ddict+1, dict, dictSize); /* local copy */
|
||||
ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
|
||||
dict = ddict+1;
|
||||
}
|
||||
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
|
||||
|
|
@ -213,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
|||
{
|
||||
if (ddict==NULL) return 0; /* support free on NULL */
|
||||
{ ZSTD_customMem const cMem = ddict->cMem;
|
||||
ZSTD_free(ddict->dictBuffer, cMem);
|
||||
ZSTD_free(ddict, cMem);
|
||||
ZSTD_customFree(ddict->dictBuffer, cMem);
|
||||
ZSTD_customFree(ddict, cMem);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -240,5 +240,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
|
|||
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
|
||||
{
|
||||
if (ddict==NULL) return 0;
|
||||
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
|
||||
return ddict->dictID;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "zstd.h" /* ZSTD_DDict, and several public functions */
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include "zstd.h" /* DCtx, and some public functions */
|
||||
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
||||
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
||||
|
|
@ -33,6 +33,12 @@
|
|||
*/
|
||||
|
||||
|
||||
/* Streaming state is used to inform allocation of the literal buffer */
|
||||
typedef enum {
|
||||
not_streaming = 0,
|
||||
is_streaming = 1
|
||||
} streaming_operation;
|
||||
|
||||
/* ZSTD_decompressBlock_internal() :
|
||||
* decompress block, starting at `src`,
|
||||
* into destination buffer `dst`.
|
||||
|
|
@ -41,19 +47,27 @@
|
|||
*/
|
||||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize, const int frame);
|
||||
const void* src, size_t srcSize, const streaming_operation streaming);
|
||||
|
||||
/* ZSTD_buildFSETable() :
|
||||
* generate FSE decoding table for one symbol (ll, ml or off)
|
||||
* this function must be called with valid parameters only
|
||||
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
||||
* in which case it cannot fail.
|
||||
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
||||
* defined in zstd_decompress_internal.h.
|
||||
* Internal use only.
|
||||
*/
|
||||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog);
|
||||
const U32* baseValue, const U8* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize,
|
||||
int bmi2);
|
||||
|
||||
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
|
||||
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
|
||||
#endif /* ZSTD_DEC_BLOCK_H */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -20,33 +20,33 @@
|
|||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "mem.h" /* BYTE, U16, U32 */
|
||||
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
||||
#include "zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
||||
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Constants
|
||||
*********************************************************/
|
||||
static const U32 LL_base[MaxLL+1] = {
|
||||
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 18, 20, 22, 24, 28, 32, 40,
|
||||
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
||||
0x2000, 0x4000, 0x8000, 0x10000 };
|
||||
|
||||
static const U32 OF_base[MaxOff+1] = {
|
||||
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
||||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
||||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
static const U32 OF_bits[MaxOff+1] = {
|
||||
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
static const U32 ML_base[MaxML+1] = {
|
||||
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
||||
3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 20, 21, 22, 23, 24, 25, 26,
|
||||
|
|
@ -73,12 +73,17 @@ static const U32 ML_base[MaxML+1] = {
|
|||
|
||||
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
||||
|
||||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
||||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
||||
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
|
||||
|
||||
typedef struct {
|
||||
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
||||
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
||||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
|
||||
U32 rep[ZSTD_REP_NUM];
|
||||
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
||||
} ZSTD_entropyDTables_t;
|
||||
|
||||
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
||||
|
|
@ -95,10 +100,28 @@ typedef enum {
|
|||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
||||
} ZSTD_dictUses_e;
|
||||
|
||||
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
||||
typedef struct {
|
||||
const ZSTD_DDict** ddictPtrTable;
|
||||
size_t ddictPtrTableSize;
|
||||
size_t ddictPtrCount;
|
||||
} ZSTD_DDictHashSet;
|
||||
|
||||
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
||||
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
||||
#endif
|
||||
|
||||
#define ZSTD_LBMIN 64
|
||||
#define ZSTD_LBMAX (128 << 10)
|
||||
|
||||
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
||||
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
||||
|
||||
typedef enum {
|
||||
ZSTD_obm_buffered = 0, /* Buffer the output */
|
||||
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
|
||||
} ZSTD_outBufferMode_e;
|
||||
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
||||
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
||||
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
||||
} ZSTD_litLocation_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
|
|
@ -114,6 +137,7 @@ struct ZSTD_DCtx_s
|
|||
const void* dictEnd; /* end of previous segment */
|
||||
size_t expected;
|
||||
ZSTD_frameHeader fParams;
|
||||
U64 processedCSize;
|
||||
U64 decodedSize;
|
||||
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
||||
ZSTD_dStage stage;
|
||||
|
|
@ -122,12 +146,17 @@ struct ZSTD_DCtx_s
|
|||
XXH64_state_t xxhState;
|
||||
size_t headerSize;
|
||||
ZSTD_format_e format;
|
||||
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
||||
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
||||
const BYTE* litPtr;
|
||||
ZSTD_customMem customMem;
|
||||
size_t litSize;
|
||||
size_t rleSize;
|
||||
size_t staticSize;
|
||||
int isFrameDecompression;
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
||||
#endif
|
||||
|
||||
/* dictionary */
|
||||
ZSTD_DDict* ddictLocal;
|
||||
|
|
@ -135,6 +164,10 @@ struct ZSTD_DCtx_s
|
|||
U32 dictID;
|
||||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
||||
ZSTD_dictUses_e dictUses;
|
||||
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
||||
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
||||
int disableHufAsm;
|
||||
int maxBlockSizeParam;
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
|
|
@ -147,16 +180,21 @@ struct ZSTD_DCtx_s
|
|||
size_t outStart;
|
||||
size_t outEnd;
|
||||
size_t lhSize;
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
void* legacyContext;
|
||||
U32 previousLegacyVersion;
|
||||
U32 legacyVersion;
|
||||
#endif
|
||||
U32 hostageByte;
|
||||
int noForwardProgress;
|
||||
ZSTD_outBufferMode_e outBufferMode;
|
||||
ZSTD_bufferMode_e outBufferMode;
|
||||
ZSTD_outBuffer expectedOutBuffer;
|
||||
|
||||
/* workspace */
|
||||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
||||
BYTE* litBuffer;
|
||||
const BYTE* litBufferEnd;
|
||||
ZSTD_litLocation_e litBufferLocation;
|
||||
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
||||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
||||
|
||||
size_t oversizedDuration;
|
||||
|
|
@ -165,8 +203,21 @@ struct ZSTD_DCtx_s
|
|||
void const* dictContentBeginForFuzzing;
|
||||
void const* dictContentEndForFuzzing;
|
||||
#endif
|
||||
|
||||
/* Tracing */
|
||||
#if ZSTD_TRACE
|
||||
ZSTD_TraceCtx traceCtx;
|
||||
#endif
|
||||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
||||
|
||||
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
return dctx->bmi2;
|
||||
#else
|
||||
(void)dctx;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*-*******************************************************
|
||||
* Shared internal functions
|
||||
|
|
@ -183,7 +234,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|||
* If yes, do nothing (continue on current segment).
|
||||
* If not, classify previous segment as "external dictionary", and start a new segment.
|
||||
* This function cannot fail. */
|
||||
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
||||
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
||||
|
||||
|
||||
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
||||
|
|
|
|||
111
uppsrc/plugin/zstd/lib/zstd_deps.h
Normal file
111
uppsrc/plugin/zstd/lib/zstd_deps.h
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* This file provides common libc dependencies that zstd requires.
|
||||
* The purpose is to allow replacing this file with a custom implementation
|
||||
* to compile zstd without libc support.
|
||||
*/
|
||||
|
||||
/* Need:
|
||||
* NULL
|
||||
* INT_MAX
|
||||
* UINT_MAX
|
||||
* ZSTD_memcpy()
|
||||
* ZSTD_memset()
|
||||
* ZSTD_memmove()
|
||||
*/
|
||||
#ifndef ZSTD_DEPS_COMMON
|
||||
#define ZSTD_DEPS_COMMON
|
||||
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
|
||||
# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
|
||||
# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
|
||||
#else
|
||||
# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
|
||||
# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
|
||||
# define ZSTD_memset(p,v,l) memset((p),(v),(l))
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_DEPS_COMMON */
|
||||
|
||||
/* Need:
|
||||
* ZSTD_malloc()
|
||||
* ZSTD_free()
|
||||
* ZSTD_calloc()
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_MALLOC
|
||||
#ifndef ZSTD_DEPS_MALLOC
|
||||
#define ZSTD_DEPS_MALLOC
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ZSTD_malloc(s) malloc(s)
|
||||
#define ZSTD_calloc(n,s) calloc((n), (s))
|
||||
#define ZSTD_free(p) free((p))
|
||||
|
||||
#endif /* ZSTD_DEPS_MALLOC */
|
||||
#endif /* ZSTD_DEPS_NEED_MALLOC */
|
||||
|
||||
/*
|
||||
* Provides 64-bit math support.
|
||||
* Need:
|
||||
* U64 ZSTD_div64(U64 dividend, U32 divisor)
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_MATH64
|
||||
#ifndef ZSTD_DEPS_MATH64
|
||||
#define ZSTD_DEPS_MATH64
|
||||
|
||||
#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
|
||||
|
||||
#endif /* ZSTD_DEPS_MATH64 */
|
||||
#endif /* ZSTD_DEPS_NEED_MATH64 */
|
||||
|
||||
/* Need:
|
||||
* assert()
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_ASSERT
|
||||
#ifndef ZSTD_DEPS_ASSERT
|
||||
#define ZSTD_DEPS_ASSERT
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#endif /* ZSTD_DEPS_ASSERT */
|
||||
#endif /* ZSTD_DEPS_NEED_ASSERT */
|
||||
|
||||
/* Need:
|
||||
* ZSTD_DEBUG_PRINT()
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_IO
|
||||
#ifndef ZSTD_DEPS_IO
|
||||
#define ZSTD_DEPS_IO
|
||||
|
||||
#include <stdio.h>
|
||||
#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
|
||||
|
||||
#endif /* ZSTD_DEPS_IO */
|
||||
#endif /* ZSTD_DEPS_NEED_IO */
|
||||
|
||||
/* Only requested when <stdint.h> is known to be present.
|
||||
* Need:
|
||||
* intptr_t
|
||||
*/
|
||||
#ifdef ZSTD_DEPS_NEED_STDINT
|
||||
#ifndef ZSTD_DEPS_STDINT
|
||||
#define ZSTD_DEPS_STDINT
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif /* ZSTD_DEPS_STDINT */
|
||||
#endif /* ZSTD_DEPS_NEED_STDINT */
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -11,8 +11,49 @@
|
|||
#include "zstd_compress_internal.h"
|
||||
#include "zstd_double_fast.h"
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
static
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashLarge = ms->hashTable;
|
||||
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
U32 const mls = cParams->minMatch;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* ip = base + ms->nextToUpdate;
|
||||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
||||
const U32 fastHashFillStep = 3;
|
||||
|
||||
/* Always insert every fastHashFillStep position into the hash tables.
|
||||
* Insert the other positions into the large hash table if their entry
|
||||
* is empty.
|
||||
*/
|
||||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 i;
|
||||
for (i = 0; i < fastHashFillStep; ++i) {
|
||||
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
||||
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
||||
if (i == 0) {
|
||||
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
|
||||
}
|
||||
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
|
||||
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
|
||||
}
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
} }
|
||||
}
|
||||
|
||||
static
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
|
|
@ -31,27 +72,251 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|||
* is empty.
|
||||
*/
|
||||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 i;
|
||||
for (i = 0; i < fastHashFillStep; ++i) {
|
||||
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
||||
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
||||
if (i == 0)
|
||||
hashSmall[smHash] = current + i;
|
||||
hashSmall[smHash] = curr + i;
|
||||
if (i == 0 || hashLarge[lgHash] == 0)
|
||||
hashLarge[lgHash] = current + i;
|
||||
hashLarge[lgHash] = curr + i;
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
} }
|
||||
} }
|
||||
}
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp)
|
||||
{
|
||||
if (tfp == ZSTD_tfp_forCDict) {
|
||||
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
|
||||
} else {
|
||||
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
const U32 hBitsL = cParams->hashLog;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
const U32 hBitsS = cParams->chainLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
||||
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
||||
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
||||
|
||||
size_t mLength;
|
||||
U32 offset;
|
||||
U32 curr;
|
||||
|
||||
/* how many positions to search before increasing step size */
|
||||
const size_t kStepIncr = 1 << kSearchStrength;
|
||||
/* the position at which to increment the step size if no match is found */
|
||||
const BYTE* nextStep;
|
||||
size_t step; /* the current step size */
|
||||
|
||||
size_t hl0; /* the long hash at ip */
|
||||
size_t hl1; /* the long hash at ip1 */
|
||||
|
||||
U32 idxl0; /* the long match index for ip */
|
||||
U32 idxl1; /* the long match index for ip1 */
|
||||
|
||||
const BYTE* matchl0; /* the long match for ip */
|
||||
const BYTE* matchs0; /* the short match for ip */
|
||||
const BYTE* matchl1; /* the long match for ip1 */
|
||||
|
||||
const BYTE* ip = istart; /* the current position */
|
||||
const BYTE* ip1; /* the next position */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
|
||||
|
||||
/* init */
|
||||
ip += ((ip - prefixLowest) == 0);
|
||||
{
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Outer Loop: one iteration per match found and stored */
|
||||
while (1) {
|
||||
step = 1;
|
||||
nextStep = ip + kStepIncr;
|
||||
ip1 = ip + step;
|
||||
|
||||
if (ip1 > ilimit) {
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
idxl0 = hashLong[hl0];
|
||||
matchl0 = base + idxl0;
|
||||
|
||||
/* Inner Loop: one iteration per search / position */
|
||||
do {
|
||||
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
const U32 idxs0 = hashSmall[hs0];
|
||||
curr = (U32)(ip-base);
|
||||
matchs0 = base + idxs0;
|
||||
|
||||
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
|
||||
|
||||
/* check noDict repcode */
|
||||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
|
||||
|
||||
if (idxl0 > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchl0) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl0);
|
||||
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
idxl1 = hashLong[hl1];
|
||||
matchl1 = base + idxl1;
|
||||
|
||||
if (idxs0 > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
if (MEM_read32(matchs0) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
}
|
||||
|
||||
if (ip1 >= nextStep) {
|
||||
PREFETCH_L1(ip1 + 64);
|
||||
PREFETCH_L1(ip1 + 128);
|
||||
step++;
|
||||
nextStep += kStepIncr;
|
||||
}
|
||||
ip = ip1;
|
||||
ip1 += step;
|
||||
|
||||
hl0 = hl1;
|
||||
idxl0 = idxl1;
|
||||
matchl0 = matchl1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
} while (ip1 <= ilimit);
|
||||
|
||||
_cleanup:
|
||||
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
||||
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
||||
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
|
||||
_search_next_long:
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (idxl1 > prefixLowestIndex) {
|
||||
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
|
||||
ip = ip1;
|
||||
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl1);
|
||||
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
|
||||
offset = (U32)(ip - matchs0);
|
||||
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
|
||||
|
||||
/* fall-through */
|
||||
|
||||
_match_found: /* requires ip, offset, mLength */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
if (step < 4) {
|
||||
/* It is unsafe to write this value back to the hashtable when ip1 is
|
||||
* greater than or equal to the new ip we will have after we're done
|
||||
* processing this match. Rather than perform that test directly
|
||||
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
|
||||
* more predictable test. The minmatch even if we take a short match is
|
||||
* 4 bytes, so as long as step, the distance between ip and ip1
|
||||
* (initially) is less than 4, we know ip1 < new ip. */
|
||||
hashLong[hl1] = (U32)(ip1 - base);
|
||||
}
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
|
|
@ -69,57 +334,39 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams =
|
||||
dictMode == ZSTD_dictMatchState ?
|
||||
&dms->cParams : NULL;
|
||||
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
|
||||
dms->hashTable : NULL;
|
||||
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
|
||||
dms->chainTable : NULL;
|
||||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictStartIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->hashLog : hBitsL;
|
||||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
||||
const U32* const dictHashLong = dms->hashTable;
|
||||
const U32* const dictHashSmall = dms->chainTable;
|
||||
const U32 dictStartIndex = dms->window.dictLimit;
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
|
||||
if (ms->prefetchCDictTables) {
|
||||
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
||||
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
|
||||
PREFETCH_AREA(dictHashLong, hashTableBytes);
|
||||
PREFETCH_AREA(dictHashSmall, chainTableBytes);
|
||||
}
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
}
|
||||
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
|
|
@ -127,37 +374,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|||
U32 offset;
|
||||
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
U32 const current = (U32)(ip-base);
|
||||
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
|
||||
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const matchIndexL = hashLong[h2];
|
||||
U32 matchIndexS = hashSmall[h];
|
||||
const BYTE* matchLong = base + matchIndexL;
|
||||
const BYTE* match = base + matchIndexS;
|
||||
const U32 repIndex = current + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
|
||||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
||||
|
||||
/* check dictMatchState repcode */
|
||||
if (dictMode == ZSTD_dictMatchState
|
||||
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
/* check repcode */
|
||||
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
/* check noDict repcode */
|
||||
if ( dictMode == ZSTD_noDict
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
|
|
@ -169,15 +409,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else if (dictTagsMatchL) {
|
||||
/* check dictMatchState long match */
|
||||
U32 const dictMatchIndexL = dictHashLong[dictHL];
|
||||
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
||||
assert(dictMatchL < dictEnd);
|
||||
|
||||
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
||||
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
|
||||
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} }
|
||||
|
|
@ -187,9 +427,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else if (dictTagsMatchS) {
|
||||
/* check dictMatchState short match */
|
||||
U32 const dictMatchIndexS = dictHashSmall[dictHS];
|
||||
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
match = dictBase + dictMatchIndexS;
|
||||
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
||||
|
||||
|
|
@ -204,12 +444,13 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|||
continue;
|
||||
|
||||
_search_next_long:
|
||||
|
||||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
hashLong[hl3] = current + 1;
|
||||
hashLong[hl3] = curr + 1;
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (matchIndexL3 > prefixLowestIndex) {
|
||||
|
|
@ -220,23 +461,23 @@ _search_next_long:
|
|||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else if (dictTagsMatchL3) {
|
||||
/* check dict long +1 match */
|
||||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
|
||||
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
||||
assert(dictMatchL3 < dictEnd);
|
||||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
||||
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
||||
ip++;
|
||||
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
} } }
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
if (matchIndexS < prefixLowestIndex) {
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
||||
offset = (U32)(current - matchIndexS);
|
||||
offset = (U32)(curr - matchIndexS);
|
||||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
} else {
|
||||
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
||||
|
|
@ -244,13 +485,11 @@ _search_next_long:
|
|||
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
}
|
||||
|
||||
/* fall-through */
|
||||
|
||||
_match_found:
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
|
|
@ -260,7 +499,7 @@ _match_stored:
|
|||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
|
|
@ -268,53 +507,55 @@ _match_stored:
|
|||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} }
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } }
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
rep[0] = offset_1;
|
||||
rep[1] = offset_2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
|
||||
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
||||
void const* src, size_t srcSize) \
|
||||
{ \
|
||||
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(noDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 7)
|
||||
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
|
|
@ -325,13 +566,13 @@ size_t ZSTD_compressBlock_doubleFast(
|
|||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -345,18 +586,20 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
|||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
static
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls /* template */)
|
||||
|
|
@ -387,7 +630,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|||
|
||||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
|
|
@ -401,31 +644,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|||
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
||||
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
|
||||
const U32 curr = (U32)(ip-base);
|
||||
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
|
||||
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
size_t mLength;
|
||||
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
|
||||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
||||
|
||||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
||||
& (repIndex > dictStartIndex))
|
||||
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
} else {
|
||||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
||||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
||||
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
||||
U32 offset;
|
||||
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
||||
offset = current - matchLongIndex;
|
||||
offset = curr - matchLongIndex;
|
||||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
||||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
|
|
@ -433,24 +676,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|||
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
||||
const BYTE* match3 = match3Base + matchIndex3;
|
||||
U32 offset;
|
||||
hashLong[h3] = current + 1;
|
||||
hashLong[h3] = curr + 1;
|
||||
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
||||
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
||||
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
||||
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
||||
ip++;
|
||||
offset = current+1 - matchIndex3;
|
||||
offset = curr+1 - matchIndex3;
|
||||
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
||||
} else {
|
||||
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
||||
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
||||
offset = current - matchIndex;
|
||||
offset = curr - matchIndex;
|
||||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
}
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
|
||||
} else {
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
|
|
@ -464,7 +707,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
|
|
@ -477,12 +720,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
||||
& (repIndex2 > dictStartIndex))
|
||||
& (offset_2 <= current2 - dictStartIndex))
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
|
|
@ -500,6 +743,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(extDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
|
|
@ -510,12 +757,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
|||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -18,8 +18,12 @@ extern "C" {
|
|||
#include "mem.h" /* U32 */
|
||||
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp);
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
|
@ -30,6 +34,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
|||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
|
||||
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
|
||||
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
|
||||
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -20,19 +20,31 @@ extern "C" {
|
|||
|
||||
|
||||
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
|
||||
#ifndef ZSTDERRORLIB_VISIBILITY
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
||||
#ifndef ZSTDERRORLIB_VISIBLE
|
||||
/* Backwards compatibility with old macro name */
|
||||
# ifdef ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_VISIBLE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZSTDERRORLIB_HIDDEN
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
||||
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
|
||||
# else
|
||||
# define ZSTDERRORLIB_HIDDEN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
||||
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
|
||||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
||||
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
|
||||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
|
||||
#endif
|
||||
|
||||
/*-*********************************************
|
||||
|
|
@ -58,14 +70,17 @@ typedef enum {
|
|||
ZSTD_error_frameParameter_windowTooLarge = 16,
|
||||
ZSTD_error_corruption_detected = 20,
|
||||
ZSTD_error_checksum_wrong = 22,
|
||||
ZSTD_error_literals_headerWrong = 24,
|
||||
ZSTD_error_dictionary_corrupted = 30,
|
||||
ZSTD_error_dictionary_wrong = 32,
|
||||
ZSTD_error_dictionaryCreation_failed = 34,
|
||||
ZSTD_error_parameter_unsupported = 40,
|
||||
ZSTD_error_parameter_combination_unsupported = 41,
|
||||
ZSTD_error_parameter_outOfBound = 42,
|
||||
ZSTD_error_tableLog_tooLarge = 44,
|
||||
ZSTD_error_maxSymbolValue_tooLarge = 46,
|
||||
ZSTD_error_maxSymbolValue_tooSmall = 48,
|
||||
ZSTD_error_stabilityCondition_notRespected = 50,
|
||||
ZSTD_error_stage_wrong = 60,
|
||||
ZSTD_error_init_missing = 62,
|
||||
ZSTD_error_memory_allocation = 64,
|
||||
|
|
@ -73,10 +88,15 @@ typedef enum {
|
|||
ZSTD_error_dstSize_tooSmall = 70,
|
||||
ZSTD_error_srcSize_wrong = 72,
|
||||
ZSTD_error_dstBuffer_null = 74,
|
||||
ZSTD_error_noForwardProgress_destFull = 80,
|
||||
ZSTD_error_noForwardProgress_inputEmpty = 82,
|
||||
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
|
||||
ZSTD_error_frameIndex_tooLarge = 100,
|
||||
ZSTD_error_seekableIO = 102,
|
||||
ZSTD_error_dstBuffer_wrong = 104,
|
||||
ZSTD_error_srcBuffer_wrong = 105,
|
||||
ZSTD_error_sequenceProducer_failed = 106,
|
||||
ZSTD_error_externalSequences_invalid = 107,
|
||||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
|
||||
} ZSTD_ErrorCode;
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -19,7 +19,8 @@ extern "C" {
|
|||
#include "zstd_compress_internal.h"
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp);
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -19,10 +19,8 @@
|
|||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#ifdef __aarch64__
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "compiler.h"
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
||||
#include "error_private.h"
|
||||
|
|
@ -30,12 +28,16 @@
|
|||
#include "zstd.h"
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "huf.h"
|
||||
#ifndef XXH_STATIC_LINKING_ONLY
|
||||
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||
#endif
|
||||
#include "xxhash.h" /* XXH_reset, update, digest */
|
||||
#ifndef ZSTD_NO_TRACE
|
||||
# include "zstd_trace.h"
|
||||
#else
|
||||
# define ZSTD_TRACE 0
|
||||
#endif
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
|
|
@ -55,81 +57,7 @@ extern "C" {
|
|||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
|
||||
|
||||
|
||||
/*-*************************************
|
||||
|
|
@ -138,8 +66,7 @@ void _force_has_format_string(const char *format, ...) {
|
|||
#define ZSTD_OPT_NUM (1<<12)
|
||||
|
||||
#define ZSTD_REP_NUM 3 /* number of repcodes */
|
||||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
||||
static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
|
|
@ -153,21 +80,21 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
|||
#define BIT0 1
|
||||
|
||||
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
|
||||
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
||||
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
||||
static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
||||
static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
||||
|
||||
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
|
||||
|
||||
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
|
||||
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
||||
static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
||||
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
|
||||
|
||||
#define ZSTD_FRAMECHECKSUMSIZE 4
|
||||
|
||||
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
|
||||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
|
||||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
|
||||
#define MIN_LITERALS_FOR_4_STREAMS 6
|
||||
|
||||
#define HufLog 12
|
||||
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
|
||||
|
||||
#define LONGNBSEQ 0x7F00
|
||||
|
|
@ -175,6 +102,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|||
#define MINMATCH 3
|
||||
|
||||
#define Litbits 8
|
||||
#define LitHufLog 11
|
||||
#define MaxLit ((1<<Litbits) - 1)
|
||||
#define MaxML 52
|
||||
#define MaxLL 35
|
||||
|
|
@ -185,65 +113,92 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|||
#define LLFSELog 9
|
||||
#define OffFSELog 8
|
||||
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
|
||||
#define MaxMLBits 16
|
||||
#define MaxLLBits 16
|
||||
|
||||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16 };
|
||||
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 3, 2, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1 };
|
||||
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
|
||||
/* Each table cannot take more than #symbols * FSELog bits */
|
||||
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
|
||||
|
||||
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 6, 7, 8, 9,10,11,12,
|
||||
13,14,15,16
|
||||
};
|
||||
static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
|
||||
4, 3, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 3, 2, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1
|
||||
};
|
||||
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
||||
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
||||
|
||||
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 7, 8, 9,10,11,
|
||||
12,13,14,15,16 };
|
||||
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1,-1,-1,
|
||||
-1,-1,-1,-1,-1 };
|
||||
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 7, 8, 9,10,11,
|
||||
12,13,14,15,16
|
||||
};
|
||||
static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
|
||||
1, 4, 3, 2, 2, 2, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1,-1,-1,
|
||||
-1,-1,-1,-1,-1
|
||||
};
|
||||
#define ML_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
|
||||
static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
|
||||
|
||||
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1,-1 };
|
||||
static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
|
||||
1, 1, 1, 1, 1, 1, 2, 2,
|
||||
2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
-1,-1,-1,-1,-1
|
||||
};
|
||||
#define OF_DEFAULTNORMLOG 5 /* for static allocation */
|
||||
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
|
||||
|
||||
/*-*******************************************
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) {
|
||||
#ifdef __aarch64__
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
||||
#else
|
||||
memcpy(dst, src, 8);
|
||||
ZSTD_memcpy(dst, src, 8);
|
||||
#endif
|
||||
}
|
||||
#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
/* Need to use memmove here since the literal buffer can now be located within
|
||||
the dst buffer. In circumstances where the op "catches up" to where the
|
||||
literal buffer is, there can be partial overlaps in this call on the final
|
||||
copy if the literal is being shifted by less than 16 bytes. */
|
||||
static void ZSTD_copy16(void* dst, const void* src) {
|
||||
#ifdef __aarch64__
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||
#elif defined(__clang__)
|
||||
ZSTD_memmove(dst, src, 16);
|
||||
#else
|
||||
memcpy(dst, src, 16);
|
||||
/* ZSTD_memmove is not inlined properly by gcc */
|
||||
BYTE copy16_buf[16];
|
||||
ZSTD_memcpy(copy16_buf, src, 16);
|
||||
ZSTD_memcpy(dst, copy16_buf, 16);
|
||||
#endif
|
||||
}
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
|
||||
|
||||
#define WILDCOPY_OVERLENGTH 32
|
||||
#define WILDCOPY_VECLEN 16
|
||||
|
|
@ -255,13 +210,13 @@ typedef enum {
|
|||
} ZSTD_overlap_e;
|
||||
|
||||
/*! ZSTD_wildcopy() :
|
||||
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
|
||||
* Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
|
||||
* @param ovtype controls the overlap detection
|
||||
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
||||
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
|
||||
* The src buffer must be before the dst buffer.
|
||||
*/
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
|
|
@ -269,12 +224,10 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
|||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
||||
/* Handle short offset copies. */
|
||||
do {
|
||||
COPY8(op, ip)
|
||||
COPY8(op, ip);
|
||||
} while (op < oend);
|
||||
} else {
|
||||
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
|
||||
|
|
@ -284,20 +237,15 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
|||
* one COPY16() in the first call. Then, do two calls per loop since
|
||||
* at that point it is more likely to have a high trip count.
|
||||
*/
|
||||
#ifndef __aarch64__
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#else
|
||||
COPY16(op, ip);
|
||||
if (op >= oend) return;
|
||||
ZSTD_copy16(op, ip);
|
||||
if (16 >= length) return;
|
||||
op += 16;
|
||||
ip += 16;
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -305,7 +253,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
|
|||
{
|
||||
size_t const length = MIN(dstCapacity, srcSize);
|
||||
if (length > 0) {
|
||||
memcpy(dst, src, length);
|
||||
ZSTD_memcpy(dst, src, length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
|
@ -320,28 +268,46 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
|
|||
* In which case, resize it down to free some memory */
|
||||
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
|
||||
|
||||
/* Controls whether the input/output buffer is buffered or stable. */
|
||||
typedef enum {
|
||||
ZSTD_bm_buffered = 0, /* Buffer the input/output */
|
||||
ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
|
||||
} ZSTD_bufferMode_e;
|
||||
|
||||
|
||||
/*-*******************************************
|
||||
* Private declarations
|
||||
*********************************************/
|
||||
typedef struct seqDef_s {
|
||||
U32 offset;
|
||||
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
|
||||
U16 litLength;
|
||||
U16 matchLength;
|
||||
U16 mlBase; /* mlBase == matchLength - MINMATCH */
|
||||
} seqDef;
|
||||
|
||||
/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
|
||||
typedef enum {
|
||||
ZSTD_llt_none = 0, /* no longLengthType */
|
||||
ZSTD_llt_literalLength = 1, /* represents a long literal */
|
||||
ZSTD_llt_matchLength = 2 /* represents a long match */
|
||||
} ZSTD_longLengthType_e;
|
||||
|
||||
typedef struct {
|
||||
seqDef* sequencesStart;
|
||||
seqDef* sequences;
|
||||
BYTE* litStart;
|
||||
BYTE* lit;
|
||||
BYTE* llCode;
|
||||
BYTE* mlCode;
|
||||
BYTE* ofCode;
|
||||
seqDef* sequences; /* ptr to end of sequences */
|
||||
BYTE* litStart;
|
||||
BYTE* lit; /* ptr to end of literals */
|
||||
BYTE* llCode;
|
||||
BYTE* mlCode;
|
||||
BYTE* ofCode;
|
||||
size_t maxNbSeq;
|
||||
size_t maxNbLit;
|
||||
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
||||
U32 longLengthPos;
|
||||
|
||||
/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
|
||||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
||||
* the existing value of the litLength or matchLength by 0x10000.
|
||||
*/
|
||||
ZSTD_longLengthType_e longLengthType;
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
} seqStore_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -351,19 +317,19 @@ typedef struct {
|
|||
|
||||
/**
|
||||
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
|
||||
* indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
|
||||
* indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
|
||||
*/
|
||||
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
|
||||
{
|
||||
ZSTD_sequenceLength seqLen;
|
||||
seqLen.litLength = seq->litLength;
|
||||
seqLen.matchLength = seq->matchLength + MINMATCH;
|
||||
seqLen.matchLength = seq->mlBase + MINMATCH;
|
||||
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
|
||||
if (seqStore->longLengthID == 1) {
|
||||
seqLen.litLength += 0xFFFF;
|
||||
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
|
||||
seqLen.litLength += 0x10000;
|
||||
}
|
||||
if (seqStore->longLengthID == 2) {
|
||||
seqLen.matchLength += 0xFFFF;
|
||||
if (seqStore->longLengthType == ZSTD_llt_matchLength) {
|
||||
seqLen.matchLength += 0x10000;
|
||||
}
|
||||
}
|
||||
return seqLen;
|
||||
|
|
@ -376,42 +342,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
|
|||
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
|
||||
*/
|
||||
typedef struct {
|
||||
size_t nbBlocks;
|
||||
size_t compressedSize;
|
||||
unsigned long long decompressedBound;
|
||||
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
||||
|
||||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
/* custom memory allocation functions */
|
||||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
|
||||
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
|
||||
void ZSTD_free(void* ptr, ZSTD_customMem customMem);
|
||||
|
||||
|
||||
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_clz (val) ^ 31;
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return 31 - __CLZ(val);
|
||||
# else /* Software version */
|
||||
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
||||
U32 v = val;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
|
||||
# endif
|
||||
}
|
||||
}
|
||||
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||
|
||||
|
||||
/* ZSTD_invalidateRepCodes() :
|
||||
|
|
@ -429,16 +366,24 @@ typedef struct {
|
|||
|
||||
/*! ZSTD_getcBlockSize() :
|
||||
* Provides the size of compressed block from block header `src` */
|
||||
/* Used by: decompress, fullbench (does not get its definition from here) */
|
||||
/* Used by: decompress, fullbench */
|
||||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
blockProperties_t* bpPtr);
|
||||
|
||||
/*! ZSTD_decodeSeqHeaders() :
|
||||
* decode sequence header from src */
|
||||
/* Used by: decompress, fullbench (does not get its definition from here) */
|
||||
/* Used by: zstd_decompress_block, fullbench */
|
||||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
||||
{
|
||||
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
||||
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -17,49 +17,184 @@ extern "C" {
|
|||
|
||||
#include "zstd_compress_internal.h"
|
||||
|
||||
/**
|
||||
* Dedicated Dictionary Search Structure bucket log. In the
|
||||
* ZSTD_dedicatedDictSearch mode, the hashTable has
|
||||
* 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
|
||||
* one.
|
||||
*/
|
||||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
||||
|
||||
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
||||
|
||||
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|
||||
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|
||||
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|
||||
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
|
||||
|
||||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
|
||||
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
#endif
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
|
||||
size_t ZSTD_compressBlock_greedy(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
||||
size_t ZSTD_compressBlock_greedy_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_extDict_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
|
||||
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
|
||||
size_t ZSTD_compressBlock_lazy(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_extDict_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
|
||||
size_t ZSTD_compressBlock_lazy2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_extDict_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
|
||||
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
|
||||
size_t ZSTD_compressBlock_btlazy2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btlazy2_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
|
||||
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -11,13 +11,126 @@
|
|||
#include "zstd_ldm.h"
|
||||
|
||||
#include "debug.h"
|
||||
#include "xxhash.h"
|
||||
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
|
||||
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
||||
#include "zstd_ldm_geartab.h"
|
||||
|
||||
#define LDM_BUCKET_SIZE_LOG 3
|
||||
#define LDM_MIN_MATCH_LENGTH 64
|
||||
#define LDM_HASH_RLOG 7
|
||||
#define LDM_HASH_CHAR_OFFSET 10
|
||||
|
||||
typedef struct {
|
||||
U64 rolling;
|
||||
U64 stopMask;
|
||||
} ldmRollingHashState_t;
|
||||
|
||||
/** ZSTD_ldm_gear_init():
|
||||
*
|
||||
* Initializes the rolling hash state such that it will honor the
|
||||
* settings in params. */
|
||||
static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
|
||||
{
|
||||
unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
|
||||
unsigned hashRateLog = params->hashRateLog;
|
||||
|
||||
state->rolling = ~(U32)0;
|
||||
|
||||
/* The choice of the splitting criterion is subject to two conditions:
|
||||
* 1. it has to trigger on average every 2^(hashRateLog) bytes;
|
||||
* 2. ideally, it has to depend on a window of minMatchLength bytes.
|
||||
*
|
||||
* In the gear hash algorithm, bit n depends on the last n bytes;
|
||||
* so in order to obtain a good quality splitting criterion it is
|
||||
* preferable to use bits with high weight.
|
||||
*
|
||||
* To match condition 1 we use a mask with hashRateLog bits set
|
||||
* and, because of the previous remark, we make sure these bits
|
||||
* have the highest possible weight while still respecting
|
||||
* condition 2.
|
||||
*/
|
||||
if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
|
||||
state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
|
||||
} else {
|
||||
/* In this degenerate case we simply honor the hash rate. */
|
||||
state->stopMask = ((U64)1 << hashRateLog) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_gear_reset()
|
||||
* Feeds [data, data + minMatchLength) into the hash without registering any
|
||||
* splits. This effectively resets the hash state. This is used when skipping
|
||||
* over data, either at the beginning of a block, or skipping sections.
|
||||
*/
|
||||
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
|
||||
BYTE const* data, size_t minMatchLength)
|
||||
{
|
||||
U64 hash = state->rolling;
|
||||
size_t n = 0;
|
||||
|
||||
#define GEAR_ITER_ONCE() do { \
|
||||
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
|
||||
n += 1; \
|
||||
} while (0)
|
||||
while (n + 3 < minMatchLength) {
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
while (n < minMatchLength) {
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
#undef GEAR_ITER_ONCE
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_gear_feed():
|
||||
*
|
||||
* Registers in the splits array all the split points found in the first
|
||||
* size bytes following the data pointer. This function terminates when
|
||||
* either all the data has been processed or LDM_BATCH_SIZE splits are
|
||||
* present in the splits array.
|
||||
*
|
||||
* Precondition: The splits array must not be full.
|
||||
* Returns: The number of bytes processed. */
|
||||
static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
|
||||
BYTE const* data, size_t size,
|
||||
size_t* splits, unsigned* numSplits)
|
||||
{
|
||||
size_t n;
|
||||
U64 hash, mask;
|
||||
|
||||
hash = state->rolling;
|
||||
mask = state->stopMask;
|
||||
n = 0;
|
||||
|
||||
#define GEAR_ITER_ONCE() do { \
|
||||
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
|
||||
n += 1; \
|
||||
if (UNLIKELY((hash & mask) == 0)) { \
|
||||
splits[*numSplits] = n; \
|
||||
*numSplits += 1; \
|
||||
if (*numSplits == LDM_BATCH_SIZE) \
|
||||
goto done; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
while (n + 3 < size) {
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
while (n < size) {
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
|
||||
#undef GEAR_ITER_ONCE
|
||||
|
||||
done:
|
||||
state->rolling = hash;
|
||||
return n;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
||||
ZSTD_compressionParameters const* cParams)
|
||||
|
|
@ -27,13 +140,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
|||
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
|
||||
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
|
||||
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
|
||||
if (cParams->strategy >= ZSTD_btopt) {
|
||||
/* Get out of the way of the optimal parser */
|
||||
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
|
||||
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
|
||||
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
|
||||
params->minMatchLength = minMatch;
|
||||
}
|
||||
if (params->hashLog == 0) {
|
||||
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
|
||||
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
||||
|
|
@ -53,47 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
|||
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
||||
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
||||
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
||||
return params.enableLdm ? totalSize : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
||||
{
|
||||
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getSmallHash() :
|
||||
* numBits should be <= 32
|
||||
* If numBits==0, returns 0.
|
||||
* @return : the most significant numBits of value. */
|
||||
static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
|
||||
{
|
||||
assert(numBits <= 32);
|
||||
return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getChecksum() :
|
||||
* numBitsToDiscard should be <= 32
|
||||
* @return : the next most significant 32 bits after numBitsToDiscard */
|
||||
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
|
||||
{
|
||||
assert(numBitsToDiscard <= 32);
|
||||
return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getTag() ;
|
||||
* Given the hash, returns the most significant numTagBits bits
|
||||
* after (32 + hbits) bits.
|
||||
*
|
||||
* If there are not enough bits remaining, return the last
|
||||
* numTagBits bits. */
|
||||
static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
|
||||
{
|
||||
assert(numTagBits < 32 && hbits <= 32);
|
||||
if (32 - hbits < numTagBits) {
|
||||
return hash & (((U32)1 << numTagBits) - 1);
|
||||
} else {
|
||||
return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
|
||||
}
|
||||
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getBucket() :
|
||||
|
|
@ -110,38 +181,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
|
|||
size_t const hash, const ldmEntry_t entry,
|
||||
ldmParams_t const ldmParams)
|
||||
{
|
||||
BYTE* const bucketOffsets = ldmState->bucketOffsets;
|
||||
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
|
||||
bucketOffsets[hash]++;
|
||||
bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
|
||||
}
|
||||
BYTE* const pOffset = ldmState->bucketOffsets + hash;
|
||||
unsigned const offset = *pOffset;
|
||||
|
||||
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
|
||||
*pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
|
||||
|
||||
/** ZSTD_ldm_makeEntryAndInsertByTag() :
|
||||
*
|
||||
* Gets the small hash, checksum, and tag from the rollingHash.
|
||||
*
|
||||
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
|
||||
* creates an ldmEntry from the offset, and inserts it into the hash table.
|
||||
*
|
||||
* hBits is the length of the small hash, which is the most significant hBits
|
||||
* of rollingHash. The checksum is the next 32 most significant bits, followed
|
||||
* by ldmParams.hashRateLog bits that make up the tag. */
|
||||
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
||||
U64 const rollingHash,
|
||||
U32 const hBits,
|
||||
U32 const offset,
|
||||
ldmParams_t const ldmParams)
|
||||
{
|
||||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
|
||||
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
|
||||
if (tag == tagMask) {
|
||||
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
|
||||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
||||
ldmEntry_t entry;
|
||||
entry.offset = offset;
|
||||
entry.checksum = checksum;
|
||||
ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_countBackwardsMatch() :
|
||||
|
|
@ -150,10 +195,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
|||
* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
|
||||
static size_t ZSTD_ldm_countBackwardsMatch(
|
||||
const BYTE* pIn, const BYTE* pAnchor,
|
||||
const BYTE* pMatch, const BYTE* pBase)
|
||||
const BYTE* pMatch, const BYTE* pMatchBase)
|
||||
{
|
||||
size_t matchLength = 0;
|
||||
while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
|
||||
while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
|
||||
pIn--;
|
||||
pMatch--;
|
||||
matchLength++;
|
||||
|
|
@ -161,6 +206,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
|
|||
return matchLength;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_countBackwardsMatch_2segments() :
|
||||
* Returns the number of bytes that match backwards from pMatch,
|
||||
* even with the backwards match spanning 2 different segments.
|
||||
*
|
||||
* On reaching `pMatchBase`, start counting from mEnd */
|
||||
static size_t ZSTD_ldm_countBackwardsMatch_2segments(
|
||||
const BYTE* pIn, const BYTE* pAnchor,
|
||||
const BYTE* pMatch, const BYTE* pMatchBase,
|
||||
const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
|
||||
{
|
||||
size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
|
||||
if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
|
||||
/* If backwards match is entirely in the extDict or prefix, immediately return */
|
||||
return matchLength;
|
||||
}
|
||||
DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
|
||||
matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
|
||||
DEBUGLOG(7, "final backwards match length = %zu", matchLength);
|
||||
return matchLength;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_fillFastTables() :
|
||||
*
|
||||
* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
|
||||
|
|
@ -176,11 +242,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
|||
switch(ms->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
|
||||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
||||
break;
|
||||
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
|
||||
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
|
||||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
||||
#else
|
||||
assert(0); /* shouldn't be called: cparams should've been adjusted. */
|
||||
#endif
|
||||
break;
|
||||
|
||||
case ZSTD_greedy:
|
||||
|
|
@ -198,43 +268,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_fillLdmHashTable() :
|
||||
*
|
||||
* Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
|
||||
* lastHash is the rolling hash that corresponds to lastHashed.
|
||||
*
|
||||
* Returns the rolling hash corresponding to position iend-1. */
|
||||
static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
||||
U64 lastHash, const BYTE* lastHashed,
|
||||
const BYTE* iend, const BYTE* base,
|
||||
U32 hBits, ldmParams_t const ldmParams)
|
||||
{
|
||||
U64 rollingHash = lastHash;
|
||||
const BYTE* cur = lastHashed + 1;
|
||||
|
||||
while (cur < iend) {
|
||||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
|
||||
cur[ldmParams.minMatchLength-1],
|
||||
state->hashPower);
|
||||
ZSTD_ldm_makeEntryAndInsertByTag(state,
|
||||
rollingHash, hBits,
|
||||
(U32)(cur - base), ldmParams);
|
||||
++cur;
|
||||
}
|
||||
return rollingHash;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_fillHashTable(
|
||||
ldmState_t* state, const BYTE* ip,
|
||||
ldmState_t* ldmState, const BYTE* ip,
|
||||
const BYTE* iend, ldmParams_t const* params)
|
||||
{
|
||||
U32 const minMatchLength = params->minMatchLength;
|
||||
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
||||
BYTE const* const base = ldmState->window.base;
|
||||
BYTE const* const istart = ip;
|
||||
ldmRollingHashState_t hashState;
|
||||
size_t* const splits = ldmState->splitIndices;
|
||||
unsigned numSplits;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
|
||||
if ((size_t)(iend - ip) >= params->minMatchLength) {
|
||||
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
|
||||
ZSTD_ldm_fillLdmHashTable(
|
||||
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
|
||||
params->hashLog - params->bucketSizeLog,
|
||||
*params);
|
||||
|
||||
ZSTD_ldm_gear_init(&hashState, params);
|
||||
while (ip < iend) {
|
||||
size_t hashed;
|
||||
unsigned n;
|
||||
|
||||
numSplits = 0;
|
||||
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
|
||||
|
||||
for (n = 0; n < numSplits; n++) {
|
||||
if (ip + splits[n] >= istart + minMatchLength) {
|
||||
BYTE const* const split = ip + splits[n] - minMatchLength;
|
||||
U64 const xxhash = XXH64(split, minMatchLength, 0);
|
||||
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
|
||||
ldmEntry_t entry;
|
||||
|
||||
entry.offset = (U32)(split - base);
|
||||
entry.checksum = (U32)(xxhash >> 32);
|
||||
ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
|
||||
}
|
||||
}
|
||||
|
||||
ip += hashed;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -246,25 +315,24 @@ void ZSTD_ldm_fillHashTable(
|
|||
* (after a long match, only update tables a limited amount). */
|
||||
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
|
||||
{
|
||||
U32 const current = (U32)(anchor - ms->window.base);
|
||||
if (current > ms->nextToUpdate + 1024) {
|
||||
U32 const curr = (U32)(anchor - ms->window.base);
|
||||
if (curr > ms->nextToUpdate + 1024) {
|
||||
ms->nextToUpdate =
|
||||
current - MIN(512, current - ms->nextToUpdate - 1024);
|
||||
curr - MIN(512, curr - ms->nextToUpdate - 1024);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t ZSTD_ldm_generateSequences_internal(
|
||||
static
|
||||
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
||||
size_t ZSTD_ldm_generateSequences_internal(
|
||||
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
|
||||
ldmParams_t const* params, void const* src, size_t srcSize)
|
||||
{
|
||||
/* LDM parameters */
|
||||
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
|
||||
U32 const minMatchLength = params->minMatchLength;
|
||||
U64 const hashPower = ldmState->hashPower;
|
||||
U32 const entsPerBucket = 1U << params->bucketSizeLog;
|
||||
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
||||
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
|
||||
U32 const hashRateLog = params->hashRateLog;
|
||||
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
|
||||
/* Prefix and extDict parameters */
|
||||
U32 const dictLimit = ldmState->window.dictLimit;
|
||||
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
|
||||
|
|
@ -276,45 +344,69 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
/* Input bounds */
|
||||
BYTE const* const istart = (BYTE const*)src;
|
||||
BYTE const* const iend = istart + srcSize;
|
||||
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
|
||||
BYTE const* const ilimit = iend - HASH_READ_SIZE;
|
||||
/* Input positions */
|
||||
BYTE const* anchor = istart;
|
||||
BYTE const* ip = istart;
|
||||
/* Rolling hash */
|
||||
BYTE const* lastHashed = NULL;
|
||||
U64 rollingHash = 0;
|
||||
/* Rolling hash state */
|
||||
ldmRollingHashState_t hashState;
|
||||
/* Arrays for staged-processing */
|
||||
size_t* const splits = ldmState->splitIndices;
|
||||
ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
|
||||
unsigned numSplits;
|
||||
|
||||
while (ip <= ilimit) {
|
||||
size_t mLength;
|
||||
U32 const current = (U32)(ip - base);
|
||||
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
||||
ldmEntry_t* bestEntry = NULL;
|
||||
if (ip != istart) {
|
||||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
|
||||
lastHashed[minMatchLength],
|
||||
hashPower);
|
||||
} else {
|
||||
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
|
||||
}
|
||||
lastHashed = ip;
|
||||
if (srcSize < minMatchLength)
|
||||
return iend - anchor;
|
||||
|
||||
/* Do not insert and do not look for a match */
|
||||
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
|
||||
ip++;
|
||||
continue;
|
||||
/* Initialize the rolling hash state with the first minMatchLength bytes */
|
||||
ZSTD_ldm_gear_init(&hashState, params);
|
||||
ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
|
||||
ip += minMatchLength;
|
||||
|
||||
while (ip < ilimit) {
|
||||
size_t hashed;
|
||||
unsigned n;
|
||||
|
||||
numSplits = 0;
|
||||
hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
|
||||
splits, &numSplits);
|
||||
|
||||
for (n = 0; n < numSplits; n++) {
|
||||
BYTE const* const split = ip + splits[n] - minMatchLength;
|
||||
U64 const xxhash = XXH64(split, minMatchLength, 0);
|
||||
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
|
||||
|
||||
candidates[n].split = split;
|
||||
candidates[n].hash = hash;
|
||||
candidates[n].checksum = (U32)(xxhash >> 32);
|
||||
candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
|
||||
PREFETCH_L1(candidates[n].bucket);
|
||||
}
|
||||
|
||||
/* Get the best entry and compute the match lengths */
|
||||
{
|
||||
ldmEntry_t* const bucket =
|
||||
ZSTD_ldm_getBucket(ldmState,
|
||||
ZSTD_ldm_getSmallHash(rollingHash, hBits),
|
||||
*params);
|
||||
ldmEntry_t* cur;
|
||||
size_t bestMatchLength = 0;
|
||||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
||||
for (n = 0; n < numSplits; n++) {
|
||||
size_t forwardMatchLength = 0, backwardMatchLength = 0,
|
||||
bestMatchLength = 0, mLength;
|
||||
U32 offset;
|
||||
BYTE const* const split = candidates[n].split;
|
||||
U32 const checksum = candidates[n].checksum;
|
||||
U32 const hash = candidates[n].hash;
|
||||
ldmEntry_t* const bucket = candidates[n].bucket;
|
||||
ldmEntry_t const* cur;
|
||||
ldmEntry_t const* bestEntry = NULL;
|
||||
ldmEntry_t newEntry;
|
||||
|
||||
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
|
||||
newEntry.offset = (U32)(split - base);
|
||||
newEntry.checksum = checksum;
|
||||
|
||||
/* If a split point would generate a sequence overlapping with
|
||||
* the previous one, we merely register it in the hash table and
|
||||
* move on */
|
||||
if (split < anchor) {
|
||||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
|
||||
size_t curForwardMatchLength, curBackwardMatchLength,
|
||||
curTotalMatchLength;
|
||||
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
||||
|
|
@ -328,30 +420,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
cur->offset < dictLimit ? dictEnd : iend;
|
||||
BYTE const* const lowMatchPtr =
|
||||
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
|
||||
|
||||
curForwardMatchLength = ZSTD_count_2segments(
|
||||
ip, pMatch, iend,
|
||||
matchEnd, lowPrefixPtr);
|
||||
curForwardMatchLength =
|
||||
ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
|
||||
if (curForwardMatchLength < minMatchLength) {
|
||||
continue;
|
||||
}
|
||||
curBackwardMatchLength =
|
||||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
||||
lowMatchPtr);
|
||||
curTotalMatchLength = curForwardMatchLength +
|
||||
curBackwardMatchLength;
|
||||
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
|
||||
split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
|
||||
} else { /* !extDict */
|
||||
BYTE const* const pMatch = base + cur->offset;
|
||||
curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
|
||||
curForwardMatchLength = ZSTD_count(split, pMatch, iend);
|
||||
if (curForwardMatchLength < minMatchLength) {
|
||||
continue;
|
||||
}
|
||||
curBackwardMatchLength =
|
||||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
||||
lowPrefixPtr);
|
||||
curTotalMatchLength = curForwardMatchLength +
|
||||
curBackwardMatchLength;
|
||||
ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
|
||||
}
|
||||
curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
|
||||
|
||||
if (curTotalMatchLength > bestMatchLength) {
|
||||
bestMatchLength = curTotalMatchLength;
|
||||
|
|
@ -360,57 +445,54 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
bestEntry = cur;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* No match found -- continue searching */
|
||||
if (bestEntry == NULL) {
|
||||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
|
||||
hBits, current,
|
||||
*params);
|
||||
ip++;
|
||||
continue;
|
||||
}
|
||||
/* No match found -- insert an entry into the hash table
|
||||
* and process the next candidate match */
|
||||
if (bestEntry == NULL) {
|
||||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Match found */
|
||||
mLength = forwardMatchLength + backwardMatchLength;
|
||||
ip -= backwardMatchLength;
|
||||
/* Match found */
|
||||
offset = (U32)(split - base) - bestEntry->offset;
|
||||
mLength = forwardMatchLength + backwardMatchLength;
|
||||
{
|
||||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
||||
|
||||
{
|
||||
/* Store the sequence:
|
||||
* ip = current - backwardMatchLength
|
||||
* The match is at (bestEntry->offset - backwardMatchLength)
|
||||
/* Out of sequence storage */
|
||||
if (rawSeqStore->size == rawSeqStore->capacity)
|
||||
return ERROR(dstSize_tooSmall);
|
||||
seq->litLength = (U32)(split - backwardMatchLength - anchor);
|
||||
seq->matchLength = (U32)mLength;
|
||||
seq->offset = offset;
|
||||
rawSeqStore->size++;
|
||||
}
|
||||
|
||||
/* Insert the current entry into the hash table --- it must be
|
||||
* done after the previous block to avoid clobbering bestEntry */
|
||||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
|
||||
|
||||
anchor = split + forwardMatchLength;
|
||||
|
||||
/* If we find a match that ends after the data that we've hashed
|
||||
* then we have a repeating, overlapping, pattern. E.g. all zeros.
|
||||
* If one repetition of the pattern matches our `stopMask` then all
|
||||
* repetitions will. We don't need to insert them all into out table,
|
||||
* only the first one. So skip over overlapping matches.
|
||||
* This is a major speed boost (20x) for compressing a single byte
|
||||
* repeated, when that byte ends up in the table.
|
||||
*/
|
||||
U32 const matchIndex = bestEntry->offset;
|
||||
U32 const offset = current - matchIndex;
|
||||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
||||
|
||||
/* Out of sequence storage */
|
||||
if (rawSeqStore->size == rawSeqStore->capacity)
|
||||
return ERROR(dstSize_tooSmall);
|
||||
seq->litLength = (U32)(ip - anchor);
|
||||
seq->matchLength = (U32)mLength;
|
||||
seq->offset = offset;
|
||||
rawSeqStore->size++;
|
||||
if (anchor > ip + hashed) {
|
||||
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
|
||||
/* Continue the outer loop at anchor (ip + hashed == anchor). */
|
||||
ip = anchor - hashed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert the current entry into the hash table */
|
||||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
|
||||
(U32)(lastHashed - base),
|
||||
*params);
|
||||
|
||||
assert(ip + backwardMatchLength == lastHashed);
|
||||
|
||||
/* Fill the hash table from lastHashed+1 to ip+mLength*/
|
||||
/* Heuristic: don't need to fill the entire table at end of block */
|
||||
if (ip + mLength <= ilimit) {
|
||||
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
||||
ldmState, rollingHash, lastHashed,
|
||||
ip + mLength, base, hBits, *params);
|
||||
lastHashed = ip + mLength - 1;
|
||||
}
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
ip += hashed;
|
||||
}
|
||||
|
||||
return iend - anchor;
|
||||
}
|
||||
|
||||
|
|
@ -459,7 +541,7 @@ size_t ZSTD_ldm_generateSequences(
|
|||
|
||||
assert(chunkStart < iend);
|
||||
/* 1. Perform overflow correction if necessary. */
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
|
||||
U32 const ldmHSize = 1U << params->hashLog;
|
||||
U32 const correction = ZSTD_window_correctOverflow(
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
||||
|
|
@ -473,7 +555,7 @@ size_t ZSTD_ldm_generateSequences(
|
|||
* the window through early invalidation.
|
||||
* TODO: * Test the chunk size.
|
||||
* * Try invalidation after the sequence generation and test the
|
||||
* the offset against maxDist directly.
|
||||
* offset against maxDist directly.
|
||||
*
|
||||
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
||||
* that any offset used is valid at the END of the sequence, since it may
|
||||
|
|
@ -503,7 +585,9 @@ size_t ZSTD_ldm_generateSequences(
|
|||
return 0;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
||||
void
|
||||
ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
|
||||
{
|
||||
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
||||
if (srcSize <= seq->litLength) {
|
||||
|
|
@ -562,14 +646,32 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
|||
return sequence;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
||||
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
||||
if (currPos >= currSeq.litLength + currSeq.matchLength) {
|
||||
currPos -= currSeq.litLength + currSeq.matchLength;
|
||||
rawSeqStore->pos++;
|
||||
} else {
|
||||
rawSeqStore->posInSequence = currPos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
|
||||
rawSeqStore->posInSequence = 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
unsigned const minMatch = cParams->minMatch;
|
||||
ZSTD_blockCompressor const blockCompressor =
|
||||
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
|
||||
ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
|
||||
/* Input bounds */
|
||||
BYTE const* const istart = (BYTE const*)src;
|
||||
BYTE const* const iend = istart + srcSize;
|
||||
|
|
@ -577,14 +679,22 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|||
BYTE const* ip = istart;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
|
||||
/* If using opt parser, use LDMs only as candidates rather than always accepting them */
|
||||
if (cParams->strategy >= ZSTD_btopt) {
|
||||
size_t lastLLSize;
|
||||
ms->ldmSeqStore = rawSeqStore;
|
||||
lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
|
||||
ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
|
||||
return lastLLSize;
|
||||
}
|
||||
|
||||
assert(rawSeqStore->pos <= rawSeqStore->size);
|
||||
assert(rawSeqStore->size <= rawSeqStore->capacity);
|
||||
/* Loop through each sequence and apply the block compressor to the lits */
|
||||
/* Loop through each sequence and apply the block compressor to the literals */
|
||||
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
|
||||
/* maybeSplitSequence updates rawSeqStore->pos */
|
||||
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
|
||||
(U32)(iend - ip), minMatch);
|
||||
int i;
|
||||
/* End signal */
|
||||
if (sequence.offset == 0)
|
||||
break;
|
||||
|
|
@ -597,6 +707,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|||
/* Run the block compressor */
|
||||
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
|
||||
{
|
||||
int i;
|
||||
size_t const newLitLength =
|
||||
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
|
||||
ip += sequence.litLength;
|
||||
|
|
@ -606,8 +717,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|||
rep[0] = sequence.offset;
|
||||
/* Store the sequence */
|
||||
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
||||
sequence.offset + ZSTD_REP_MOVE,
|
||||
sequence.matchLength - MINMATCH);
|
||||
OFFSET_TO_OFFBASE(sequence.offset),
|
||||
sequence.matchLength);
|
||||
ip += sequence.matchLength;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences(
|
|||
*/
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
|
|
@ -73,11 +74,17 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|||
*
|
||||
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
|
||||
* Avoids emitting matches less than `minMatch` bytes.
|
||||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
|
||||
* Must be called for data that is not passed to ZSTD_ldm_blockCompress().
|
||||
*/
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
|
||||
U32 const minMatch);
|
||||
|
||||
/* ZSTD_ldm_skipRawSeqStoreBytes():
|
||||
* Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
|
||||
* Not to be used in conjunction with ZSTD_ldm_skipSequences().
|
||||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
|
||||
*/
|
||||
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
|
||||
|
||||
/** ZSTD_ldm_getTableSize() :
|
||||
* Estimate the space needed for long distance matching tables or 0 if LDM is
|
||||
|
|
|
|||
106
uppsrc/plugin/zstd/lib/zstd_ldm_geartab.h
Normal file
106
uppsrc/plugin/zstd/lib/zstd_ldm_geartab.h
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_LDM_GEARTAB_H
|
||||
#define ZSTD_LDM_GEARTAB_H
|
||||
|
||||
#include "compiler.h" /* UNUSED_ATTR */
|
||||
#include "mem.h" /* U64 */
|
||||
|
||||
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
|
||||
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
|
||||
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
|
||||
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
|
||||
0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
|
||||
0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
|
||||
0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140,
|
||||
0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e,
|
||||
0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
|
||||
0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
|
||||
0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
|
||||
0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
|
||||
0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
|
||||
0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
|
||||
0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
|
||||
0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
|
||||
0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
|
||||
0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
|
||||
0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
|
||||
0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
|
||||
0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
|
||||
0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
|
||||
0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
|
||||
0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
|
||||
0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
|
||||
0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
|
||||
0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
|
||||
0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
|
||||
0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b,
|
||||
0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
|
||||
0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
|
||||
0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
|
||||
0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
|
||||
0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
|
||||
0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
|
||||
0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
|
||||
0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
|
||||
0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec,
|
||||
0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
|
||||
0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab,
|
||||
0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
|
||||
0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
|
||||
0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
|
||||
0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
|
||||
0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
|
||||
0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
|
||||
0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
|
||||
0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
|
||||
0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
|
||||
0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
|
||||
0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
|
||||
0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c,
|
||||
0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
|
||||
0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
|
||||
0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
|
||||
0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a,
|
||||
0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
|
||||
0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
|
||||
0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
|
||||
0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
|
||||
0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
|
||||
0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
|
||||
0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
|
||||
0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
|
||||
0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
|
||||
0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756,
|
||||
0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
|
||||
0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
|
||||
0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
|
||||
0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
|
||||
0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
|
||||
0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
|
||||
0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
|
||||
0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
|
||||
0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
|
||||
0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
|
||||
0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
|
||||
0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
|
||||
0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
|
||||
0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
|
||||
0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
|
||||
0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
|
||||
0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
|
||||
0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
|
||||
0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
|
||||
0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
|
||||
0x2b4da14f2613d8f4
|
||||
};
|
||||
|
||||
#endif /* ZSTD_LDM_GEARTAB_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -17,28 +17,38 @@ extern "C" {
|
|||
|
||||
#include "zstd_compress_internal.h"
|
||||
|
||||
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|
||||
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|
||||
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
|
||||
/* used in ZSTD_loadDictionaryContent() */
|
||||
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
||||
size_t ZSTD_compressBlock_btopt(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
||||
size_t ZSTD_compressBlock_btopt_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_btopt_extDict(
|
||||
#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
|
||||
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_BTOPT NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
||||
size_t ZSTD_compressBlock_btultra(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra_extDict(
|
||||
|
|
@ -48,6 +58,20 @@ size_t ZSTD_compressBlock_btultra_extDict(
|
|||
/* note : no btultra2 variant for extDict nor dictMatchState,
|
||||
* because btultra2 is not meant to work with dictionaries
|
||||
* and is only specific for the first block (no prefix) */
|
||||
size_t ZSTD_compressBlock_btultra2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
|
||||
#else
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
|
||||
#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
|
||||
#endif
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
|
|
|||
163
uppsrc/plugin/zstd/lib/zstd_trace.h
Normal file
163
uppsrc/plugin/zstd/lib/zstd_trace.h
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_TRACE_H
|
||||
#define ZSTD_TRACE_H
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* weak symbol support
|
||||
* For now, enable conservatively:
|
||||
* - Only GNUC
|
||||
* - Only ELF
|
||||
* - Only x86-64, i386 and aarch64
|
||||
* Also, explicitly disable on platforms known not to work so they aren't
|
||||
* forgotten in the future.
|
||||
*/
|
||||
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
|
||||
defined(__GNUC__) && defined(__ELF__) && \
|
||||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \
|
||||
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
|
||||
!defined(__CYGWIN__) && !defined(_AIX)
|
||||
# define ZSTD_HAVE_WEAK_SYMBOLS 1
|
||||
#else
|
||||
# define ZSTD_HAVE_WEAK_SYMBOLS 0
|
||||
#endif
|
||||
#if ZSTD_HAVE_WEAK_SYMBOLS
|
||||
# define ZSTD_WEAK_ATTR __attribute__((__weak__))
|
||||
#else
|
||||
# define ZSTD_WEAK_ATTR
|
||||
#endif
|
||||
|
||||
/* Only enable tracing when weak symbols are available. */
|
||||
#ifndef ZSTD_TRACE
|
||||
# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
|
||||
#endif
|
||||
|
||||
#if ZSTD_TRACE
|
||||
|
||||
struct ZSTD_CCtx_s;
|
||||
struct ZSTD_DCtx_s;
|
||||
struct ZSTD_CCtx_params_s;
|
||||
|
||||
typedef struct {
|
||||
/**
|
||||
* ZSTD_VERSION_NUMBER
|
||||
*
|
||||
* This is guaranteed to be the first member of ZSTD_trace.
|
||||
* Otherwise, this struct is not stable between versions. If
|
||||
* the version number does not match your expectation, you
|
||||
* should not interpret the rest of the struct.
|
||||
*/
|
||||
unsigned version;
|
||||
/**
|
||||
* Non-zero if streaming (de)compression is used.
|
||||
*/
|
||||
unsigned streaming;
|
||||
/**
|
||||
* The dictionary ID.
|
||||
*/
|
||||
unsigned dictionaryID;
|
||||
/**
|
||||
* Is the dictionary cold?
|
||||
* Only set on decompression.
|
||||
*/
|
||||
unsigned dictionaryIsCold;
|
||||
/**
|
||||
* The dictionary size or zero if no dictionary.
|
||||
*/
|
||||
size_t dictionarySize;
|
||||
/**
|
||||
* The uncompressed size of the data.
|
||||
*/
|
||||
size_t uncompressedSize;
|
||||
/**
|
||||
* The compressed size of the data.
|
||||
*/
|
||||
size_t compressedSize;
|
||||
/**
|
||||
* The fully resolved CCtx parameters (NULL on decompression).
|
||||
*/
|
||||
struct ZSTD_CCtx_params_s const* params;
|
||||
/**
|
||||
* The ZSTD_CCtx pointer (NULL on decompression).
|
||||
*/
|
||||
struct ZSTD_CCtx_s const* cctx;
|
||||
/**
|
||||
* The ZSTD_DCtx pointer (NULL on compression).
|
||||
*/
|
||||
struct ZSTD_DCtx_s const* dctx;
|
||||
} ZSTD_Trace;
|
||||
|
||||
/**
|
||||
* A tracing context. It must be 0 when tracing is disabled.
|
||||
* Otherwise, any non-zero value returned by a tracing begin()
|
||||
* function is presented to any subsequent calls to end().
|
||||
*
|
||||
* Any non-zero value is treated as tracing is enabled and not
|
||||
* interpreted by the library.
|
||||
*
|
||||
* Two possible uses are:
|
||||
* * A timestamp for when the begin() function was called.
|
||||
* * A unique key identifying the (de)compression, like the
|
||||
* address of the [dc]ctx pointer if you need to track
|
||||
* more information than just a timestamp.
|
||||
*/
|
||||
typedef unsigned long long ZSTD_TraceCtx;
|
||||
|
||||
/**
|
||||
* Trace the beginning of a compression call.
|
||||
* @param cctx The dctx pointer for the compression.
|
||||
* It can be used as a key to map begin() to end().
|
||||
* @returns Non-zero if tracing is enabled. The return value is
|
||||
* passed to ZSTD_trace_compress_end().
|
||||
*/
|
||||
ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
|
||||
struct ZSTD_CCtx_s const* cctx);
|
||||
|
||||
/**
|
||||
* Trace the end of a compression call.
|
||||
* @param ctx The return value of ZSTD_trace_compress_begin().
|
||||
* @param trace The zstd tracing info.
|
||||
*/
|
||||
ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
|
||||
ZSTD_TraceCtx ctx,
|
||||
ZSTD_Trace const* trace);
|
||||
|
||||
/**
|
||||
* Trace the beginning of a decompression call.
|
||||
* @param dctx The dctx pointer for the decompression.
|
||||
* It can be used as a key to map begin() to end().
|
||||
* @returns Non-zero if tracing is enabled. The return value is
|
||||
* passed to ZSTD_trace_compress_end().
|
||||
*/
|
||||
ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
|
||||
struct ZSTD_DCtx_s const* dctx);
|
||||
|
||||
/**
|
||||
* Trace the end of a decompression call.
|
||||
* @param ctx The return value of ZSTD_trace_decompress_begin().
|
||||
* @param trace The zstd tracing info.
|
||||
*/
|
||||
ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
|
||||
ZSTD_TraceCtx ctx,
|
||||
ZSTD_Trace const* trace);
|
||||
|
||||
#endif /* ZSTD_TRACE */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_TRACE_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
|
|
@ -19,113 +19,60 @@
|
|||
/* Note : This is an internal API.
|
||||
* These APIs used to be exposed with ZSTDLIB_API,
|
||||
* because it used to be the only way to invoke MT compression.
|
||||
* Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
|
||||
* instead.
|
||||
*
|
||||
* If you depend on these APIs and can't switch, then define
|
||||
* ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
|
||||
* However, we may completely remove these functions in a future
|
||||
* release, so please switch soon.
|
||||
* Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
|
||||
*
|
||||
* This API requires ZSTD_MULTITHREAD to be defined during compilation,
|
||||
* otherwise ZSTDMT_createCCtx*() will fail.
|
||||
*/
|
||||
|
||||
#ifdef ZSTD_LEGACY_MULTITHREADED_API
|
||||
# define ZSTDMT_API ZSTDLIB_API
|
||||
#else
|
||||
# define ZSTDMT_API
|
||||
#endif
|
||||
|
||||
/* === Dependencies === */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
|
||||
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
||||
|
||||
|
||||
/* === Constants === */
|
||||
#ifndef ZSTDMT_NBWORKERS_MAX
|
||||
# define ZSTDMT_NBWORKERS_MAX 200
|
||||
#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
|
||||
# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
|
||||
#endif
|
||||
#ifndef ZSTDMT_JOBSIZE_MIN
|
||||
# define ZSTDMT_JOBSIZE_MIN (1 MB)
|
||||
#ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */
|
||||
# define ZSTDMT_JOBSIZE_MIN (512 KB)
|
||||
#endif
|
||||
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
|
||||
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
||||
|
||||
|
||||
/* ========================================================
|
||||
* === Private interface, for use by ZSTD_compress.c ===
|
||||
* === Not exposed in libzstd. Never invoke directly ===
|
||||
* ======================================================== */
|
||||
|
||||
/* === Memory management === */
|
||||
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
|
||||
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
ZSTD_customMem cMem);
|
||||
ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
|
||||
/* === Simple one-pass compression function === */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel);
|
||||
|
||||
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
||||
ZSTD_customMem cMem,
|
||||
ZSTD_threadPool *pool);
|
||||
size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/* === Streaming functions === */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
|
||||
|
||||
/* === Advanced functions and parameters === */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog);
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
|
||||
ZSTD_parameters params,
|
||||
unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
|
||||
|
||||
ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_frameParameters fparams,
|
||||
unsigned long long pledgedSrcSize); /* note : zero means empty */
|
||||
|
||||
/* ZSTDMT_parameter :
|
||||
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
||||
typedef enum {
|
||||
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
|
||||
} ZSTDMT_parameter;
|
||||
|
||||
/* ZSTDMT_setMTCtxParameter() :
|
||||
* allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
|
||||
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
|
||||
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/* ZSTDMT_getMTCtxParameter() :
|
||||
* Query the ZSTDMT_CCtx for a parameter value.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/*! ZSTDMT_initCStream_internal() :
|
||||
* Private use only. Init streaming operation.
|
||||
* expects params to be valid.
|
||||
* must receive dict, or cdict, or none, but not both.
|
||||
* mtctx can be freshly constructed or reused from a prior compression.
|
||||
* If mtctx is reused, memory allocations from the prior compression may not be freed,
|
||||
* even if they are not needed for the current compression.
|
||||
* @return : 0, or an error code */
|
||||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
||||
|
||||
/*! ZSTDMT_compressStream_generic() :
|
||||
* Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
|
||||
|
|
@ -134,16 +81,10 @@ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
|
|||
* 0 if fully flushed
|
||||
* or an error code
|
||||
* note : needs to be init using any ZSTD_initCStream*() variant */
|
||||
ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
||||
|
||||
/* ========================================================
|
||||
* === Private interface, for use by ZSTD_compress.c ===
|
||||
* === Not exposed in libzstd. Never invoke directly ===
|
||||
* ======================================================== */
|
||||
size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
ZSTD_outBuffer* output,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
||||
/*! ZSTDMT_toFlushNow()
|
||||
* Tell how many bytes are ready to be flushed immediately.
|
||||
|
|
@ -153,15 +94,6 @@ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|||
* therefore flushing is limited by speed of oldest job. */
|
||||
size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/*! ZSTDMT_CCtxParam_setMTCtxParameter()
|
||||
* like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
|
||||
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/*! ZSTDMT_CCtxParam_setNbWorkers()
|
||||
* Set nbWorkers, and clamp it.
|
||||
* Also reset jobSize and overlapLog */
|
||||
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
|
||||
|
||||
/*! ZSTDMT_updateCParams_whileCompressing() :
|
||||
* Updates only a selected set of compression parameters, to remain compatible with current frame.
|
||||
* New parameters will be applied to next compression job. */
|
||||
|
|
@ -174,17 +106,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
|||
ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
|
||||
/*! ZSTDMT_initCStream_internal() :
|
||||
* Private use only. Init streaming operation.
|
||||
* expects params to be valid.
|
||||
* must receive dict, or cdict, or none, but not both.
|
||||
* @return : 0, or an error code */
|
||||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
||||
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ file
|
|||
src.tpp,
|
||||
Copying,
|
||||
lib readonly separator,
|
||||
lib\zstd.h,
|
||||
lib\huf_decompress.c,
|
||||
lib\zstd_ddict.c,
|
||||
lib\zstd_decompress.c,
|
||||
|
|
@ -34,5 +33,7 @@ file
|
|||
lib\zstd_compress_superblock.c,
|
||||
lib\zstd_compress_sequences.c,
|
||||
lib\zstd_compress_literals.c,
|
||||
lib\zstd_compress_superblock.h;
|
||||
lib\zstd_compress_superblock.h,
|
||||
lib\xxhash.c,
|
||||
lib\zstd.h;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue