13# pragma warning(disable : 4127)
14# pragma warning(disable : 4293)
18#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64))
26static constexpr int MEMORY_USAGE = 14;
27static constexpr int HASHLOG = MEMORY_USAGE - 2;
28static constexpr int HASH_SIZE_U32 = 1 << HASHLOG;
29static constexpr int ACCELERATION = 1;
30static constexpr int MINMATCH = 4;
31static constexpr int WILDCOPYLENGTH = 8;
32static constexpr int LASTLITERALS = 5;
33static constexpr int MFLIMIT = 12;
34static constexpr int MATCH_SAFEGUARD_DISTANCE = 2 * WILDCOPYLENGTH - MINMATCH;
35static constexpr int FASTLOOP_SAFE_DISTANCE = 64;
36static constexpr int MIN_LENGTH = MFLIMIT + 1;
37static constexpr int DISTANCE_MAX = 65535;
38static constexpr int ML_BITS = 4;
39static constexpr int ML_MASK = (1 << ML_BITS) - 1;
40static constexpr int RUN_BITS = 8 - ML_BITS;
41static constexpr int RUN_MASK = (1 << RUN_BITS) - 1;
42static constexpr int LIMIT_64K = 0x10000 + (MFLIMIT - 1);
43static constexpr uint32_t SKIP_TRIGGER = 6;
46static constexpr int STEPSIZE =
sizeof(
reg_t);
49[[nodiscard]]
static reg_t read_ARCH(
const uint8_t* p)
52 memcpy(&val, p,
sizeof(val));
57static void wildCopy8(uint8_t* dst,
const uint8_t* src, uint8_t* dstEnd)
63 }
while (dst < dstEnd);
69static void wildCopy32(uint8_t* dst,
const uint8_t* src, uint8_t* dstEnd)
72 memcpy(dst + 0, src + 0, 16);
73 memcpy(dst + 16, src + 16, 16);
76 }
while (dst < dstEnd);
79static constexpr std::array<unsigned, 8> inc32table = {0, 1, 2, 1, 0, 4, 4, 4};
80static constexpr std::array<int , 8> dec64table = {0, 0, 0, -1, -4, 1, 2, 3};
82static void memcpy_using_offset_base(uint8_t* dstPtr,
const uint8_t* srcPtr, uint8_t* dstEnd,
const size_t offset)
85 dstPtr[0] = srcPtr[0];
86 dstPtr[1] = srcPtr[1];
87 dstPtr[2] = srcPtr[2];
88 dstPtr[3] = srcPtr[3];
89 srcPtr += inc32table[offset];
90 memcpy(dstPtr + 4, srcPtr, 4);
91 srcPtr -= dec64table[offset];
94 memcpy(dstPtr, srcPtr, 8);
99 wildCopy8(dstPtr, srcPtr, dstEnd);
105static void memcpy_using_offset(uint8_t* dstPtr,
const uint8_t* srcPtr, uint8_t* dstEnd,
size_t offset)
107 std::array<uint8_t, 8> v;
113 memset(v.data(), *srcPtr, 8);
116 memcpy(&v[0], srcPtr, 2);
117 memcpy(&v[2], srcPtr, 2);
118 memcpy(&v[4], &v[0], 4);
121 memcpy(&v[0], srcPtr, 4);
122 memcpy(&v[4], srcPtr, 4);
125 memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
129 memcpy(dstPtr, v.data(), 8);
131 while (dstPtr < dstEnd) {
132 memcpy(dstPtr, v.data(), 8);
137[[nodiscard]]
static inline int NbCommonBytes(
size_t val)
141 return std::countl_zero(val) >> 3;
143 return std::countr_zero(val) >> 3;
147[[nodiscard]]
ALWAYS_INLINE unsigned count(
const uint8_t* pIn,
const uint8_t* pMatch,
const uint8_t* pInLimit)
149 const uint8_t*
const pStart = pIn;
151 if (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
152 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
157 return NbCommonBytes(diff);
160 while (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
161 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
167 pIn += NbCommonBytes(diff);
168 return unsigned(pIn - pStart);
179 if ((pIn < pInLimit) && (*pMatch == *pIn)) {
182 return unsigned(pIn - pStart);
189template<
bool ARCH64>
struct HashImpl<true, ARCH64> {
190 alignas(uint64_t) std::array<uint16_t, 1 << (HASHLOG + 1)> tab = {};
194 return (sequence * 2654435761U) >> ((MINMATCH * 8) - (HASHLOG + 1));
197 tab[h] = uint16_t(idx);
200 tab[h] = uint16_t(p - srcBase);
203 putPositionOnHash(p, hashPosition(p), srcBase);
209 return tab[h] + srcBase;
211 [[nodiscard]]
const uint8_t*
getPosition(
const uint8_t* p,
const uint8_t* srcBase)
const {
212 return getPositionOnHash(hashPosition(p), srcBase);
218 alignas(uint64_t) std::array<uint32_t, 1 << HASHLOG> tab = {};
221 uint64_t sequence = read_ARCH(p);
223 ? 11400714785074694791ULL
225 return uint32_t(((sequence << 24) * prime) >> (64 - HASHLOG));
231 tab[h] = uint32_t(p - srcBase);
234 putPositionOnHash(p, hashPosition(p), srcBase);
240 return tab[h] + srcBase;
242 [[nodiscard]]
const uint8_t*
getPosition(
const uint8_t* p,
const uint8_t* srcBase)
const {
243 return getPositionOnHash(hashPosition(p), srcBase);
249 alignas(uint64_t) std::array<
const uint8_t*, 1 << HASHLOG> tab = {};
253 return (sequence * 2654435761U) >> ((MINMATCH * 8) - HASHLOG);
262 putPositionOnHash(p, hashPosition(p), srcBase);
270 [[nodiscard]]
const uint8_t*
getPosition(
const uint8_t* p,
const uint8_t* srcBase)
const {
271 return getPositionOnHash(hashPosition(p), srcBase);
275template<
bool L64K,
bool ARCH64>
280 const uint8_t* ip = src;
283 const uint8_t* anchor = src;
284 const uint8_t*
const iend = ip + inputSize;
285 const uint8_t*
const mflimitPlusOne = iend - MFLIMIT + 1;
286 const uint8_t*
const matchlimit = iend - LASTLITERALS;
290 if (inputSize < MIN_LENGTH)
goto _last_literals;
293 hashTable.putPosition(ip, src);
295 forwardH = hashTable.hashPosition(ip);
299 const uint8_t*
match;
300 if constexpr (!L64K && !ARCH64) {
301 const uint8_t* forwardIp = ip;
303 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
305 uint32_t h = forwardH;
308 step = searchMatchNb++ >> SKIP_TRIGGER;
310 if (forwardIp > mflimitPlusOne) [[unlikely]]
goto _last_literals;
312 match = hashTable.getPositionOnHash(h, src);
313 forwardH = hashTable.hashPosition(forwardIp);
314 hashTable.putPositionOnHash(ip, h, src);
315 }
while ((
match + DISTANCE_MAX < ip) ||
319 const uint8_t* forwardIp = ip;
321 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
324 auto current = uint32_t(forwardIp - src);
325 auto matchIndex = hashTable.getIndexOnHash(h);
328 step = searchMatchNb++ >> SKIP_TRIGGER;
330 if (forwardIp > mflimitPlusOne) [[unlikely]]
goto _last_literals;
332 match = src + matchIndex;
333 forwardH = hashTable.hashPosition(forwardIp);
334 hashTable.putIndexOnHash(current, h);
336 if (!L64K && (matchIndex + DISTANCE_MAX < current)) {
347 while (((ip > anchor) & (
match > src)) && ((ip[-1] ==
match[-1]))) {
353 auto litLength = unsigned(ip - anchor);
354 uint8_t* token = op++;
355 if (litLength >= RUN_MASK) {
356 int len = int(litLength - RUN_MASK);
357 *token = RUN_MASK << ML_BITS;
362 *op++ = uint8_t(len);
364 *token = uint8_t(litLength << ML_BITS);
368 wildCopy8(op, anchor, op + litLength);
382 unsigned matchCode =
count(ip + MINMATCH,
match + MINMATCH, matchlimit);
383 ip += size_t(matchCode) + MINMATCH;
385 if (matchCode >= ML_MASK) {
387 matchCode -= ML_MASK;
389 while (matchCode >= 4 * 255) {
392 matchCode -= 4 * 255;
394 op += matchCode / 255;
395 *op++ = uint8_t(matchCode % 255);
397 *token += uint8_t(matchCode);
403 if (ip >= mflimitPlusOne)
break;
406 hashTable.putPosition(ip - 2, src);
409 if constexpr (!L64K && !ARCH64) {
410 match = hashTable.getPosition(ip, src);
411 hashTable.putPosition(ip, src);
418 auto h = hashTable.hashPosition(ip);
419 auto current = uint32_t(ip - src);
420 auto matchIndex = hashTable.getIndexOnHash(h);
421 match = src + matchIndex;
422 hashTable.putIndexOnHash(current, h);
423 if ((L64K || (matchIndex + DISTANCE_MAX >= current)) &&
432 forwardH = hashTable.hashPosition(++ip);
437 auto lastRun = size_t(iend - anchor);
438 if (lastRun >= RUN_MASK) {
439 size_t accumulator = lastRun - RUN_MASK;
440 *op++ = RUN_MASK << ML_BITS;
441 while (accumulator >= 255) {
445 *op++ = uint8_t(accumulator);
447 *op++ = uint8_t(lastRun << ML_BITS);
449 memcpy(op, anchor, lastRun);
450 ip = anchor + lastRun;
453 return int(op - dst);
456int compress(
const uint8_t* src, uint8_t* dst,
int srcSize)
458 if (srcSize < LIMIT_64K) {
459 return compress_impl<true, LZ4_ARCH64>(src, dst, srcSize);
461 return compress_impl<false, LZ4_ARCH64>(src, dst, srcSize);
467static ALWAYS_INLINE unsigned read_variable_length(
const uint8_t** ip)
480int decompress(
const uint8_t* src, uint8_t* dst,
int compressedSize,
int dstCapacity)
482 const uint8_t* ip = src;
483 const uint8_t*
const iend = ip + compressedSize;
486 uint8_t*
const oend = op + dstCapacity;
490 const uint8_t*
const shortiend = iend - 14 - 2 ;
491 const uint8_t*
const shortoend = oend - 14 - 18 ;
493 const uint8_t*
match;
498 if ((oend - op) >= FASTLOOP_SAFE_DISTANCE) {
503 length = token >> ML_BITS;
507 length += read_variable_length(&ip);
511 if ((cpy > oend - 32) || (ip +
length > iend - 32)) {
512 goto safe_literal_copy;
514 wildCopy32(op, ip, cpy);
520 if (ip > iend - (16 + 1)) {
521 goto safe_literal_copy;
538 length += read_variable_length(&ip);
540 if (op +
length >= oend - FASTLOOP_SAFE_DISTANCE) {
541 goto safe_match_copy;
545 if (op +
length >= oend - FASTLOOP_SAFE_DISTANCE) {
546 goto safe_match_copy;
552 memcpy(op + 0,
match + 0, 8);
553 memcpy(op + 8,
match + 8, 8);
554 memcpy(op + 16,
match + 16, 2);
564 if (offset < 16) [[unlikely]] {
565 memcpy_using_offset(op,
match, cpy, offset);
567 wildCopy32(op,
match, cpy);
577 length = token >> ML_BITS;
587 if ((
length != RUN_MASK) &&
589 ((ip < shortiend) & (op <= shortoend))) {
603 if ((
length != ML_MASK) && (offset >= 8) && (
match >= dst)) {
605 memcpy(op + 0,
match + 0, 8);
606 memcpy(op + 8,
match + 8, 8);
607 memcpy(op + 16,
match + 16, 2);
620 length += read_variable_length(&ip);
626 if ((((cpy > oend - MFLIMIT) || (ip +
length > iend - (2 + 1 + LASTLITERALS))))) {
636 wildCopy8(op, ip, cpy);
651 length += read_variable_length(&ip);
659 if (offset < 8) [[unlikely]] {
665 match += inc32table[offset];
666 memcpy(op + 4,
match, 4);
667 match -= dec64table[offset];
669 memcpy(op,
match, 8);
674 if (cpy > oend - MATCH_SAFEGUARD_DISTANCE) [[unlikely]] {
675 uint8_t*
const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
676 if (op < oCopyLimit) {
677 wildCopy8(op,
match, oCopyLimit);
678 match += oCopyLimit - op;
685 memcpy(op,
match, 8);
687 wildCopy8(op + 8,
match + 8, cpy);
693 return int(op - dst);
ALWAYS_INLINE uint16_t unalignedLoad16(const void *p)
ALWAYS_INLINE void unalignedStore32(void *p, uint32_t v)
ALWAYS_INLINE uint32_t unalignedLoad32(const void *p)
ALWAYS_INLINE uint16_t read_UA_L16(const void *p)
ALWAYS_INLINE void write_UA_L16(void *p, uint16_t x)
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
int decompress(const uint8_t *src, uint8_t *dst, int compressedSize, int dstCapacity)
ALWAYS_INLINE int compress_impl(const uint8_t *src, uint8_t *const dst, const int inputSize)
int compress(const uint8_t *src, uint8_t *dst, int srcSize)
T length(const vecN< N, T > &x)
std::optional< bool > match(const BooleanInput &binding, const Event &event, std::function< int(int)> getJoyDeadZone)
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *)
uint32_t getIndexOnHash(uint32_t) const
void putPosition(const uint8_t *p, const uint8_t *srcBase)
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *) const
void putIndexOnHash(uint32_t, uint32_t)
static uint32_t hashPosition(const uint8_t *p)
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
void putIndexOnHash(uint32_t idx, uint32_t h)
static uint32_t hashPosition(const uint8_t *p)
uint32_t getIndexOnHash(uint32_t h) const
void putPosition(const uint8_t *p, const uint8_t *srcBase)
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
uint32_t getIndexOnHash(uint32_t h) const
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
void putPosition(const uint8_t *p, const uint8_t *srcBase)
void putIndexOnHash(uint32_t idx, uint32_t h)
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
static uint32_t hashPosition(const uint8_t *p)
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)