26static constexpr int MEMORY_USAGE = 14;
27static constexpr int HASHLOG = MEMORY_USAGE - 2;
28static constexpr int ACCELERATION = 1;
29static constexpr int MINMATCH = 4;
30static constexpr int WILDCOPYLENGTH = 8;
31static constexpr int LASTLITERALS = 5;
32static constexpr int MFLIMIT = 12;
33static constexpr int MATCH_SAFEGUARD_DISTANCE = 2 * WILDCOPYLENGTH - MINMATCH;
34static constexpr int FASTLOOP_SAFE_DISTANCE = 64;
35static constexpr int MIN_LENGTH = MFLIMIT + 1;
36static constexpr int DISTANCE_MAX = 65535;
37static constexpr int ML_BITS = 4;
38static constexpr int ML_MASK = (1 << ML_BITS) - 1;
39static constexpr int RUN_BITS = 8 - ML_BITS;
40static constexpr int RUN_MASK = (1 << RUN_BITS) - 1;
41static constexpr int LIMIT_64K = 0x10000 + (MFLIMIT - 1);
42static constexpr uint32_t SKIP_TRIGGER = 6;
45static constexpr int STEPSIZE =
sizeof(
reg_t);
48[[nodiscard]]
static reg_t read_ARCH(
const uint8_t* p)
51 memcpy(&val, p,
sizeof(val));
56static void wildCopy8(uint8_t* dst,
const uint8_t* src, uint8_t* dstEnd)
62 }
while (dst < dstEnd);
68static void wildCopy32(uint8_t* dst,
const uint8_t* src, uint8_t* dstEnd)
71 memcpy(dst + 0, src + 0, 16);
72 memcpy(dst + 16, src + 16, 16);
75 }
while (dst < dstEnd);
78static constexpr std::array<unsigned, 8> inc32table = {0, 1, 2, 1, 0, 4, 4, 4};
79static constexpr std::array<int , 8> dec64table = {0, 0, 0, -1, -4, 1, 2, 3};
81static void memcpy_using_offset_base(uint8_t* dstPtr,
const uint8_t* srcPtr, uint8_t* dstEnd,
const size_t offset)
84 dstPtr[0] = srcPtr[0];
85 dstPtr[1] = srcPtr[1];
86 dstPtr[2] = srcPtr[2];
87 dstPtr[3] = srcPtr[3];
88 srcPtr += inc32table[offset];
89 memcpy(dstPtr + 4, srcPtr, 4);
90 srcPtr -= dec64table[offset];
93 memcpy(dstPtr, srcPtr, 8);
98 wildCopy8(dstPtr, srcPtr, dstEnd);
104static void memcpy_using_offset(uint8_t* dstPtr,
const uint8_t* srcPtr, uint8_t* dstEnd,
size_t offset)
106 std::array<uint8_t, 8> v;
112 memset(v.data(), *srcPtr, 8);
115 memcpy(&v[0], srcPtr, 2);
116 memcpy(&v[2], srcPtr, 2);
117 memcpy(&v[4], &v[0], 4);
120 memcpy(&v[0], srcPtr, 4);
121 memcpy(&v[4], srcPtr, 4);
124 memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
128 memcpy(dstPtr, v.data(), 8);
130 while (dstPtr < dstEnd) {
131 memcpy(dstPtr, v.data(), 8);
136[[nodiscard]]
static inline int NbCommonBytes(
size_t val)
140 return std::countl_zero(val) >> 3;
142 return std::countr_zero(val) >> 3;
146[[nodiscard]]
static ALWAYS_INLINE unsigned count(
const uint8_t* pIn,
const uint8_t* pMatch,
const uint8_t* pInLimit)
148 const uint8_t*
const pStart = pIn;
150 if (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
151 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
156 return NbCommonBytes(diff);
159 while (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
160 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
166 pIn += NbCommonBytes(diff);
167 return unsigned(pIn - pStart);
178 if ((pIn < pInLimit) && (*pMatch == *pIn)) {
181 return unsigned(pIn - pStart);
188template<
bool ARCH64>
struct HashImpl<true, ARCH64> {
189 alignas(uint64_t) std::array<uint16_t, 1 << (HASHLOG + 1)> tab = {};
193 return (sequence * 2654435761U) >> ((MINMATCH * 8) - (HASHLOG + 1));
196 tab[h] = uint16_t(idx);
199 tab[h] = uint16_t(p - srcBase);
202 putPositionOnHash(p, hashPosition(p), srcBase);
208 return tab[h] + srcBase;
210 [[nodiscard]]
const uint8_t*
getPosition(
const uint8_t* p,
const uint8_t* srcBase)
const {
211 return getPositionOnHash(hashPosition(p), srcBase);
217 alignas(uint64_t) std::array<uint32_t, 1 << HASHLOG> tab = {};
220 uint64_t sequence = read_ARCH(p);
222 ? 11400714785074694791ULL
224 return uint32_t(((sequence << 24) * prime) >> (64 - HASHLOG));
230 tab[h] = uint32_t(p - srcBase);
233 putPositionOnHash(p, hashPosition(p), srcBase);
239 return tab[h] + srcBase;
241 [[nodiscard]]
const uint8_t*
getPosition(
const uint8_t* p,
const uint8_t* srcBase)
const {
242 return getPositionOnHash(hashPosition(p), srcBase);
248 alignas(uint64_t) std::array<
const uint8_t*, 1 << HASHLOG> tab = {};
252 return (sequence * 2654435761U) >> ((MINMATCH * 8) - HASHLOG);
261 putPositionOnHash(p, hashPosition(p), srcBase);
269 [[nodiscard]]
const uint8_t*
getPosition(
const uint8_t* p,
const uint8_t* srcBase)
const {
270 return getPositionOnHash(hashPosition(p), srcBase);
274template<
bool L64K,
bool ARCH64>
275static ALWAYS_INLINE int compress_impl(
const uint8_t* src, uint8_t*
const dst,
const int inputSize)
277 HashImpl<L64K, ARCH64> hashTable;
279 const uint8_t* ip = src;
282 const uint8_t* anchor = src;
283 const uint8_t*
const iend = ip + inputSize;
284 const uint8_t*
const mflimitPlusOne = iend - MFLIMIT + 1;
285 const uint8_t*
const matchlimit = iend - LASTLITERALS;
289 if (inputSize < MIN_LENGTH)
goto _last_literals;
292 hashTable.putPosition(ip, src);
294 forwardH = hashTable.hashPosition(ip);
298 const uint8_t* match;
299 if constexpr (!L64K && !ARCH64) {
300 const uint8_t* forwardIp = ip;
302 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
304 uint32_t h = forwardH;
307 step = searchMatchNb++ >> SKIP_TRIGGER;
309 if (forwardIp > mflimitPlusOne) [[unlikely]]
goto _last_literals;
311 match = hashTable.getPositionOnHash(h, src);
312 forwardH = hashTable.hashPosition(forwardIp);
313 hashTable.putPositionOnHash(ip, h, src);
314 }
while ((match + DISTANCE_MAX < ip) ||
318 const uint8_t* forwardIp = ip;
320 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
323 auto current = uint32_t(forwardIp - src);
324 auto matchIndex = hashTable.getIndexOnHash(h);
327 step = searchMatchNb++ >> SKIP_TRIGGER;
329 if (forwardIp > mflimitPlusOne) [[unlikely]]
goto _last_literals;
331 match = src + matchIndex;
332 forwardH = hashTable.hashPosition(forwardIp);
333 hashTable.putIndexOnHash(current, h);
335 if (!L64K && (matchIndex + DISTANCE_MAX < current)) {
346 while (((ip > anchor) & (match > src)) && ((ip[-1] == match[-1]))) {
352 auto litLength = unsigned(ip - anchor);
353 uint8_t* token = op++;
354 if (litLength >= RUN_MASK) {
355 auto len = int(litLength - RUN_MASK);
356 *token = RUN_MASK << ML_BITS;
361 *op++ = uint8_t(len);
363 *token = uint8_t(litLength << ML_BITS);
367 wildCopy8(op, anchor, op + litLength);
381 unsigned matchCode = count(ip + MINMATCH, match + MINMATCH, matchlimit);
382 ip += size_t(matchCode) + MINMATCH;
384 if (matchCode >= ML_MASK) {
386 matchCode -= ML_MASK;
388 while (matchCode >= 4 * 255) {
391 matchCode -= 4 * 255;
393 op += matchCode / 255;
394 *op++ = uint8_t(matchCode % 255);
396 *token += uint8_t(matchCode);
402 if (ip >= mflimitPlusOne)
break;
405 hashTable.putPosition(ip - 2, src);
408 if constexpr (!L64K && !ARCH64) {
409 match = hashTable.getPosition(ip, src);
410 hashTable.putPosition(ip, src);
417 auto h = hashTable.hashPosition(ip);
418 auto current = uint32_t(ip - src);
419 auto matchIndex = hashTable.getIndexOnHash(h);
420 match = src + matchIndex;
421 hashTable.putIndexOnHash(current, h);
422 if ((L64K || (matchIndex + DISTANCE_MAX >= current)) &&
431 forwardH = hashTable.hashPosition(++ip);
436 auto lastRun = size_t(iend - anchor);
437 if (lastRun >= RUN_MASK) {
438 size_t accumulator = lastRun - RUN_MASK;
439 *op++ = RUN_MASK << ML_BITS;
440 while (accumulator >= 255) {
444 *op++ = uint8_t(accumulator);
446 *op++ = uint8_t(lastRun << ML_BITS);
448 memcpy(op, anchor, lastRun);
449 ip = anchor + lastRun;
452 return int(op - dst);
455int compress(
const uint8_t* src, uint8_t* dst,
int srcSize)
457 if (srcSize < LIMIT_64K) {
458 return compress_impl<true, LZ4_ARCH64>(src, dst, srcSize);
460 return compress_impl<false, LZ4_ARCH64>(src, dst, srcSize);
466static ALWAYS_INLINE unsigned read_variable_length(
const uint8_t** ip)
479int decompress(
const uint8_t* src, uint8_t* dst,
int compressedSize,
int dstCapacity)
481 const uint8_t* ip = src;
482 const uint8_t*
const iend = ip + compressedSize;
485 uint8_t*
const oend = op + dstCapacity;
489 const uint8_t*
const shortiend = iend - 14 - 2 ;
490 const uint8_t*
const shortoend = oend - 14 - 18 ;
492 const uint8_t* match;
497 if ((oend - op) >= FASTLOOP_SAFE_DISTANCE) {
502 length = token >> ML_BITS;
505 if (length == RUN_MASK) {
506 length += read_variable_length(&ip);
510 if ((cpy > oend - 32) || (ip + length > iend - 32)) {
511 goto safe_literal_copy;
513 wildCopy32(op, ip, cpy);
519 if (ip > iend - (16 + 1)) {
520 goto safe_literal_copy;
534 length = token & ML_MASK;
536 if (length == ML_MASK) {
537 length += read_variable_length(&ip);
539 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
540 goto safe_match_copy;
544 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
545 goto safe_match_copy;
549 if ((match >= dst) && (offset >= 8)) {
550 memcpy(op + 0, match + 0, 8);
551 memcpy(op + 8, match + 8, 8);
552 memcpy(op + 16, match + 16, 2);
561 if (offset < 16) [[unlikely]] {
562 memcpy_using_offset(op, match, cpy, offset);
564 wildCopy32(op, match, cpy);
574 length = token >> ML_BITS;
584 if ((length != RUN_MASK) &&
586 ((ip < shortiend) & (op <= shortoend))) {
594 length = token & ML_MASK;
600 if ((length != ML_MASK) && (offset >= 8) && (match >= dst)) {
602 memcpy(op + 0, match + 0, 8);
603 memcpy(op + 8, match + 8, 8);
604 memcpy(op + 16, match + 16, 2);
605 op += length + MINMATCH;
616 if (length == RUN_MASK) {
617 length += read_variable_length(&ip);
623 if ((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS))) {
628 memmove(op, ip, length);
633 wildCopy8(op, ip, cpy);
644 length = token & ML_MASK;
647 if (length == ML_MASK) {
648 length += read_variable_length(&ip);
656 if (offset < 8) [[unlikely]] {
662 match += inc32table[offset];
663 memcpy(op + 4, match, 4);
664 match -= dec64table[offset];
666 memcpy(op, match, 8);
671 if (cpy > oend - MATCH_SAFEGUARD_DISTANCE) [[unlikely]] {
672 uint8_t*
const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
673 if (op < oCopyLimit) {
674 wildCopy8(op, match, oCopyLimit);
675 match += oCopyLimit - op;
682 memcpy(op, match, 8);
684 wildCopy8(op + 8, match + 8, cpy);
690 return int(op - dst);