openMSX
lz4.cc
Go to the documentation of this file.
1#include "lz4.hh"
2
3#include "aligned.hh"
4#include "endian.hh"
5#include "inline.hh"
6#include "unreachable.hh"
7#include <array>
8#include <bit>
9#include <cstring>
10
11#ifdef _MSC_VER
12# include <intrin.h>
13# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
14# pragma warning(disable : 4293) // disable: C4293: too large shift (32-bits)
15#endif
16
17// 32 or 64 bits ?
18#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64))
19# define LZ4_ARCH64 1
20#else
21# define LZ4_ARCH64 0
22#endif
23
24namespace LZ4 {
25
26static constexpr int MEMORY_USAGE = 14;
27static constexpr int HASHLOG = MEMORY_USAGE - 2;
28static constexpr int HASH_SIZE_U32 = 1 << HASHLOG;
29static constexpr int ACCELERATION = 1;
30static constexpr int MINMATCH = 4;
31static constexpr int WILDCOPYLENGTH = 8;
32static constexpr int LASTLITERALS = 5; // see ../doc/lz4_Block_format.md#parsing-restrictions
33static constexpr int MFLIMIT = 12; // see ../doc/lz4_Block_format.md#parsing-restrictions
34static constexpr int MATCH_SAFEGUARD_DISTANCE = 2 * WILDCOPYLENGTH - MINMATCH; // ensure it's possible to write 2 x wildcopyLength without overflowing output buffer
35static constexpr int FASTLOOP_SAFE_DISTANCE = 64;
36static constexpr int MIN_LENGTH = MFLIMIT + 1;
37static constexpr int DISTANCE_MAX = 65535;
38static constexpr int ML_BITS = 4;
39static constexpr int ML_MASK = (1 << ML_BITS) - 1;
40static constexpr int RUN_BITS = 8 - ML_BITS;
41static constexpr int RUN_MASK = (1 << RUN_BITS) - 1;
42static constexpr int LIMIT_64K = 0x10000 + (MFLIMIT - 1);
43static constexpr uint32_t SKIP_TRIGGER = 6; // Increase this value ==> compression run slower on incompressible data
44
45using reg_t = uintptr_t;
46static constexpr int STEPSIZE = sizeof(reg_t);
47
48
49[[nodiscard]] static reg_t read_ARCH(const uint8_t* p)
50{
51 reg_t val;
52 memcpy(&val, p, sizeof(val));
53 return val;
54}
55
56// customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd
57static void wildCopy8(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
58{
59 do {
60 memcpy(dst, src, 8);
61 dst += 8;
62 src += 8;
63 } while (dst < dstEnd);
64}
65
66// customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
67// this version copies two times 16 bytes (instead of one time 32 bytes)
68// because it must be compatible with offsets >= 16.
69static void wildCopy32(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
70{
71 do {
72 memcpy(dst + 0, src + 0, 16);
73 memcpy(dst + 16, src + 16, 16);
74 dst += 32;
75 src += 32;
76 } while (dst < dstEnd);
77}
78
79static constexpr std::array<unsigned, 8> inc32table = {0, 1, 2, 1, 0, 4, 4, 4};
80static constexpr std::array<int , 8> dec64table = {0, 0, 0, -1, -4, 1, 2, 3};
81
82static void memcpy_using_offset_base(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, const size_t offset)
83{
84 if (offset < 8) {
85 dstPtr[0] = srcPtr[0];
86 dstPtr[1] = srcPtr[1];
87 dstPtr[2] = srcPtr[2];
88 dstPtr[3] = srcPtr[3];
89 srcPtr += inc32table[offset];
90 memcpy(dstPtr + 4, srcPtr, 4);
91 srcPtr -= dec64table[offset];
92 dstPtr += 8;
93 } else {
94 memcpy(dstPtr, srcPtr, 8);
95 dstPtr += 8;
96 srcPtr += 8;
97 }
98
99 wildCopy8(dstPtr, srcPtr, dstEnd);
100}
101
102// memcpy_using_offset() presumes :
103// - dstEnd >= dstPtr + MINMATCH
104// - there is at least 8 bytes available to write after dstEnd
105static void memcpy_using_offset(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, size_t offset)
106{
107 std::array<uint8_t, 8> v;
108
109 unalignedStore32(dstPtr, 0); // silence an msan warning when offset==0
110
111 switch (offset) {
112 case 1:
113 memset(v.data(), *srcPtr, 8);
114 break;
115 case 2:
116 memcpy(&v[0], srcPtr, 2);
117 memcpy(&v[2], srcPtr, 2);
118 memcpy(&v[4], &v[0], 4);
119 break;
120 case 4:
121 memcpy(&v[0], srcPtr, 4);
122 memcpy(&v[4], srcPtr, 4);
123 break;
124 default:
125 memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
126 return;
127 }
128
129 memcpy(dstPtr, v.data(), 8);
130 dstPtr += 8;
131 while (dstPtr < dstEnd) {
132 memcpy(dstPtr, v.data(), 8);
133 dstPtr += 8;
134 }
135}
136
137[[nodiscard]] static inline int NbCommonBytes(size_t val)
138{
139 if (val == 0) UNREACHABLE;
140 if constexpr (Endian::BIG) {
141 return std::countl_zero(val) >> 3;
142 } else {
143 return std::countr_zero(val) >> 3;
144 }
145}
146
147[[nodiscard]] ALWAYS_INLINE unsigned count(const uint8_t* pIn, const uint8_t* pMatch, const uint8_t* pInLimit)
148{
149 const uint8_t* const pStart = pIn;
150
151 if (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
152 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
153 if (!diff) {
154 pIn += STEPSIZE;
155 pMatch += STEPSIZE;
156 } else {
157 return NbCommonBytes(diff);
158 }
159 }
160 while (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
161 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
162 if (!diff) {
163 pIn += STEPSIZE;
164 pMatch += STEPSIZE;
165 continue;
166 }
167 pIn += NbCommonBytes(diff);
168 return unsigned(pIn - pStart);
169 }
170
171 if ((STEPSIZE == 8) && (pIn < (pInLimit - 3)) && (unalignedLoad32(pMatch) == unalignedLoad32(pIn))) {
172 pIn += 4;
173 pMatch += 4;
174 }
175 if ((pIn < (pInLimit - 1)) && (unalignedLoad16(pMatch) == unalignedLoad16(pIn))) {
176 pIn += 2;
177 pMatch += 2;
178 }
179 if ((pIn < pInLimit) && (*pMatch == *pIn)) {
180 pIn += 1;
181 }
182 return unsigned(pIn - pStart);
183}
184
185
186template<bool L64K, bool ARCH64> struct HashImpl;
187
188// byU16
189template<bool ARCH64> struct HashImpl<true, ARCH64> {
190 alignas(uint64_t) std::array<uint16_t, 1 << (HASHLOG + 1)> tab = {};
191
192 [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
193 uint32_t sequence = unalignedLoad32(p);
194 return (sequence * 2654435761U) >> ((MINMATCH * 8) - (HASHLOG + 1));
195 }
196 void putIndexOnHash(uint32_t idx, uint32_t h) {
197 tab[h] = uint16_t(idx);
198 }
199 void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
200 tab[h] = uint16_t(p - srcBase);
201 }
202 void putPosition(const uint8_t* p, const uint8_t* srcBase) {
203 putPositionOnHash(p, hashPosition(p), srcBase);
204 }
205 [[nodiscard]] uint32_t getIndexOnHash(uint32_t h) const {
206 return tab[h];
207 }
208 [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
209 return tab[h] + srcBase;
210 }
211 [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
212 return getPositionOnHash(hashPosition(p), srcBase);
213 }
214};
215
216// byU32
217template<> struct HashImpl<false, true> {
218 alignas(uint64_t) std::array<uint32_t, 1 << HASHLOG> tab = {};
219
220 [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
221 uint64_t sequence = read_ARCH(p);
222 const uint64_t prime = Endian::BIG
223 ? 11400714785074694791ULL // 8 bytes
224 : 889523592379ULL; // 5 bytes
225 return uint32_t(((sequence << 24) * prime) >> (64 - HASHLOG));
226 }
227 void putIndexOnHash(uint32_t idx, uint32_t h) {
228 tab[h] = idx;
229 }
230 void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
231 tab[h] = uint32_t(p - srcBase);
232 }
233 void putPosition(const uint8_t* p, const uint8_t* srcBase) {
234 putPositionOnHash(p, hashPosition(p), srcBase);
235 }
236 [[nodiscard]] uint32_t getIndexOnHash(uint32_t h) const {
237 return tab[h];
238 }
239 [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
240 return tab[h] + srcBase;
241 }
242 [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
243 return getPositionOnHash(hashPosition(p), srcBase);
244 }
245};
246
247// byPtr
248template<> struct HashImpl<false, false> {
249 alignas(uint64_t) std::array<const uint8_t*, 1 << HASHLOG> tab = {};
250
251 [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
252 uint32_t sequence = unalignedLoad32(p);
253 return (sequence * 2654435761U) >> ((MINMATCH * 8) - HASHLOG);
254 }
255 void putIndexOnHash(uint32_t /*idx*/, uint32_t /*h*/) {
257 }
258 void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* /*srcBase*/) {
259 tab[h] = p;
260 }
261 void putPosition(const uint8_t* p, const uint8_t* srcBase) {
262 putPositionOnHash(p, hashPosition(p), srcBase);
263 }
264 [[nodiscard]] uint32_t getIndexOnHash(uint32_t /*h*/) const {
265 UNREACHABLE; return 0;
266 }
267 [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* /*srcBase*/) const {
268 return tab[h];
269 }
270 [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
271 return getPositionOnHash(hashPosition(p), srcBase);
272 }
273};
274
275template<bool L64K, bool ARCH64>
276ALWAYS_INLINE int compress_impl(const uint8_t* src, uint8_t* const dst, const int inputSize)
277{
278 HashImpl<L64K, ARCH64> hashTable;
279
280 const uint8_t* ip = src;
281 uint8_t* op = dst;
282
283 const uint8_t* anchor = src;
284 const uint8_t* const iend = ip + inputSize;
285 const uint8_t* const mflimitPlusOne = iend - MFLIMIT + 1;
286 const uint8_t* const matchlimit = iend - LASTLITERALS;
287
288 uint32_t forwardH;
289
290 if (inputSize < MIN_LENGTH) goto _last_literals; // Input too small, no compression (all literals)
291
292 // First byte
293 hashTable.putPosition(ip, src);
294 ip++;
295 forwardH = hashTable.hashPosition(ip);
296
297 while (true) {
298 // Find a match
299 const uint8_t* match;
300 if constexpr (!L64K && !ARCH64) { // byPtr
301 const uint8_t* forwardIp = ip;
302 int step = 1;
303 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
304 do {
305 uint32_t h = forwardH;
306 ip = forwardIp;
307 forwardIp += step;
308 step = searchMatchNb++ >> SKIP_TRIGGER;
309
310 if (forwardIp > mflimitPlusOne) [[unlikely]] goto _last_literals;
311
312 match = hashTable.getPositionOnHash(h, src);
313 forwardH = hashTable.hashPosition(forwardIp);
314 hashTable.putPositionOnHash(ip, h, src);
315 } while ((match + DISTANCE_MAX < ip) ||
316 (unalignedLoad32(match) != unalignedLoad32(ip)));
317
318 } else { // byU16 or byU32
319 const uint8_t* forwardIp = ip;
320 int step = 1;
321 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
322 while (true) {
323 auto h = forwardH;
324 auto current = uint32_t(forwardIp - src);
325 auto matchIndex = hashTable.getIndexOnHash(h);
326 ip = forwardIp;
327 forwardIp += step;
328 step = searchMatchNb++ >> SKIP_TRIGGER;
329
330 if (forwardIp > mflimitPlusOne) [[unlikely]] goto _last_literals;
331
332 match = src + matchIndex;
333 forwardH = hashTable.hashPosition(forwardIp);
334 hashTable.putIndexOnHash(current, h);
335
336 if (!L64K && (matchIndex + DISTANCE_MAX < current)) {
337 continue; // too far
338 }
339
340 if (unalignedLoad32(match) == unalignedLoad32(ip)) {
341 break; // match found
342 }
343 }
344 }
345
346 // Catch up
347 while (((ip > anchor) & (match > src)) && (/*unlikely*/(ip[-1] == match[-1]))) {
348 ip--;
349 match--;
350 }
351
352 // Encode Literals
353 auto litLength = unsigned(ip - anchor);
354 uint8_t* token = op++;
355 if (litLength >= RUN_MASK) {
356 int len = int(litLength - RUN_MASK);
357 *token = RUN_MASK << ML_BITS;
358 while (len >= 255) {
359 *op++ = 255;
360 len -= 255;
361 }
362 *op++ = uint8_t(len);
363 } else {
364 *token = uint8_t(litLength << ML_BITS);
365 }
366
367 // Copy Literals
368 wildCopy8(op, anchor, op + litLength);
369 op += litLength;
370
371_next_match:
372 // At this stage, the following variables must be correctly set:
373 // - ip : at start of LZ operation
374 // - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
375 // - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
376
377 // Encode Offset
378 Endian::write_UA_L16(op, uint16_t(ip - match));
379 op += 2;
380
381 // Encode MatchLength
382 unsigned matchCode = count(ip + MINMATCH, match + MINMATCH, matchlimit);
383 ip += size_t(matchCode) + MINMATCH;
384
385 if (matchCode >= ML_MASK) {
386 *token += ML_MASK;
387 matchCode -= ML_MASK;
388 unalignedStore32(op, 0xFFFFFFFF);
389 while (matchCode >= 4 * 255) {
390 op += 4;
391 unalignedStore32(op, 0xFFFFFFFF);
392 matchCode -= 4 * 255;
393 }
394 op += matchCode / 255;
395 *op++ = uint8_t(matchCode % 255);
396 } else {
397 *token += uint8_t(matchCode);
398 }
399
400 anchor = ip;
401
402 // Test end of chunk
403 if (ip >= mflimitPlusOne) break;
404
405 // Fill table
406 hashTable.putPosition(ip - 2, src);
407
408 // Test next position
409 if constexpr (!L64K && !ARCH64) { // byPtr
410 match = hashTable.getPosition(ip, src);
411 hashTable.putPosition(ip, src);
412 if ((match + DISTANCE_MAX >= ip) && (unalignedLoad32(match) == unalignedLoad32(ip))) {
413 token = op++;
414 *token = 0;
415 goto _next_match;
416 }
417 } else { // byU16 or byU32
418 auto h = hashTable.hashPosition(ip);
419 auto current = uint32_t(ip - src);
420 auto matchIndex = hashTable.getIndexOnHash(h);
421 match = src + matchIndex;
422 hashTable.putIndexOnHash(current, h);
423 if ((L64K || (matchIndex + DISTANCE_MAX >= current)) &&
424 (unalignedLoad32(match) == unalignedLoad32(ip))) {
425 token = op++;
426 *token = 0;
427 goto _next_match;
428 }
429 }
430
431 // Prepare next loop
432 forwardH = hashTable.hashPosition(++ip);
433 }
434
435_last_literals:
436 // Encode Last Literals
437 auto lastRun = size_t(iend - anchor);
438 if (lastRun >= RUN_MASK) {
439 size_t accumulator = lastRun - RUN_MASK;
440 *op++ = RUN_MASK << ML_BITS;
441 while (accumulator >= 255) {
442 *op++ = 255;
443 accumulator -= 255;
444 }
445 *op++ = uint8_t(accumulator);
446 } else {
447 *op++ = uint8_t(lastRun << ML_BITS);
448 }
449 memcpy(op, anchor, lastRun);
450 ip = anchor + lastRun;
451 op += lastRun;
452
453 return int(op - dst);
454}
455
456int compress(const uint8_t* src, uint8_t* dst, int srcSize)
457{
458 if (srcSize < LIMIT_64K) {
459 return compress_impl<true, LZ4_ARCH64>(src, dst, srcSize);
460 } else {
461 return compress_impl<false, LZ4_ARCH64>(src, dst, srcSize);
462 }
463}
464
465
466
467static ALWAYS_INLINE unsigned read_variable_length(const uint8_t** ip)
468{
469 unsigned length = 0;
470 unsigned s;
471 do {
472 s = **ip;
473 (*ip)++;
474 length += s;
475 } while (s == 255);
476
477 return length;
478}
479
480int decompress(const uint8_t* src, uint8_t* dst, int compressedSize, int dstCapacity)
481{
482 const uint8_t* ip = src;
483 const uint8_t* const iend = ip + compressedSize;
484
485 uint8_t* op = dst;
486 uint8_t* const oend = op + dstCapacity;
487 uint8_t* cpy;
488
489 // Set up the "end" pointers for the shortcut.
490 const uint8_t* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
491 const uint8_t* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
492
493 const uint8_t* match;
494 size_t offset;
495 unsigned token;
496 size_t length;
497
498 if ((oend - op) >= FASTLOOP_SAFE_DISTANCE) {
499 // Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE
500 while (true) {
501 // Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE
502 token = *ip++;
503 length = token >> ML_BITS; // literal length
504
505 // decode literal length
506 if (length == RUN_MASK) {
507 length += read_variable_length(&ip);
508
509 // copy literals
510 cpy = op + length;
511 if ((cpy > oend - 32) || (ip + length > iend - 32)) {
512 goto safe_literal_copy;
513 }
514 wildCopy32(op, ip, cpy);
515 ip += length;
516 op = cpy;
517 } else {
518 cpy = op + length;
519 // We don't need to check oend, since we check it once for each loop below
520 if (ip > iend - (16 + 1/*max lit + offset + nextToken*/)) {
521 goto safe_literal_copy;
522 }
523 // Literals can only be 14, but hope compilers optimize if we copy by a register size
524 memcpy(op, ip, 16);
525 ip += length;
526 op = cpy;
527 }
528
529 // get offset
530 offset = Endian::read_UA_L16(ip);
531 ip += 2;
532 match = op - offset;
533
534 // get matchlength
535 length = token & ML_MASK;
536
537 if (length == ML_MASK) {
538 length += read_variable_length(&ip);
539 length += MINMATCH;
540 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
541 goto safe_match_copy;
542 }
543 } else {
544 length += MINMATCH;
545 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
546 goto safe_match_copy;
547 }
548
549 // Fastpath check: Avoids a branch in wildCopy32 if true
550 if (match >= dst) {
551 if (offset >= 8) {
552 memcpy(op + 0, match + 0, 8);
553 memcpy(op + 8, match + 8, 8);
554 memcpy(op + 16, match + 16, 2);
555 op += length;
556 continue;
557 }
558 }
559 }
560
561 // copy match within block
562 cpy = op + length;
563
564 if (offset < 16) [[unlikely]] {
565 memcpy_using_offset(op, match, cpy, offset);
566 } else {
567 wildCopy32(op, match, cpy);
568 }
569
570 op = cpy; // wildcopy correction
571 }
572 }
573
574 // Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE
575 while (true) {
576 token = *ip++;
577 length = token >> ML_BITS; // literal length
578
579 // A two-stage shortcut for the most common case:
580 // 1) If the literal length is 0..14, and there is enough space,
581 // enter the shortcut and copy 16 bytes on behalf of the literals
582 // (in the fast mode, only 8 bytes can be safely copied this way).
583 // 2) Further if the match length is 4..18, copy 18 bytes in a similar
584 // manner; but we ensure that there's enough space in the output for
585 // those 18 bytes earlier, upon entering the shortcut (in other words,
586 // there is a combined check for both stages).
587 if ((length != RUN_MASK) &&
588 // strictly "less than" on input, to re-enter the loop with at least one byte
589 /*likely*/((ip < shortiend) & (op <= shortoend))) {
590 // Copy the literals
591 memcpy(op, ip, 16);
592 op += length;
593 ip += length;
594
595 // The second stage: prepare for match copying, decode full info.
596 // If it doesn't work out, the info won't be wasted.
597 length = token & ML_MASK; // match length
598 offset = Endian::read_UA_L16(ip);
599 ip += 2;
600 match = op - offset;
601
602 // Do not deal with overlapping matches.
603 if ((length != ML_MASK) && (offset >= 8) && (match >= dst)) {
604 // Copy the match.
605 memcpy(op + 0, match + 0, 8);
606 memcpy(op + 8, match + 8, 8);
607 memcpy(op + 16, match + 16, 2);
608 op += length + MINMATCH;
609 // Both stages worked, load the next token.
610 continue;
611 }
612
613 // The second stage didn't work out, but the info is ready.
614 // Propel it right to the point of match copying.
615 goto _copy_match;
616 }
617
618 // decode literal length
619 if (length == RUN_MASK) {
620 length += read_variable_length(&ip);
621 }
622
623 // copy literals
624 cpy = op + length;
625safe_literal_copy:
626 if ((((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS))))) {
627 // We've either hit the input parsing restriction or the output parsing restriction.
628 // If we've hit the input parsing condition then this must be the last sequence.
629 // If we've hit the output parsing condition then we are either using partialDecoding
630 // or we've hit the output parsing condition.
631 memmove(op, ip, length); // supports overlapping memory regions, which only matters for in-place decompression scenarios
632 ip += length;
633 op += length;
634 break;
635 } else {
636 wildCopy8(op, ip, cpy); // may overwrite up to WILDCOPYLENGTH beyond cpy
637 ip += length;
638 op = cpy;
639 }
640
641 // get offset
642 offset = Endian::read_UA_L16(ip);
643 ip += 2;
644 match = op - offset;
645
646 // get matchlength
647 length = token & ML_MASK;
648
649_copy_match:
650 if (length == ML_MASK) {
651 length += read_variable_length(&ip);
652 }
653 length += MINMATCH;
654
655safe_match_copy:
656 // copy match within block
657 cpy = op + length;
658
659 if (offset < 8) [[unlikely]] {
660 unalignedStore32(op, 0); // silence msan warning when offset == 0
661 op[0] = match[0];
662 op[1] = match[1];
663 op[2] = match[2];
664 op[3] = match[3];
665 match += inc32table[offset];
666 memcpy(op + 4, match, 4);
667 match -= dec64table[offset];
668 } else {
669 memcpy(op, match, 8);
670 match += 8;
671 }
672 op += 8;
673
674 if (cpy > oend - MATCH_SAFEGUARD_DISTANCE) [[unlikely]] {
675 uint8_t* const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
676 if (op < oCopyLimit) {
677 wildCopy8(op, match, oCopyLimit);
678 match += oCopyLimit - op;
679 op = oCopyLimit;
680 }
681 while (op < cpy) {
682 *op++ = *match++;
683 }
684 } else {
685 memcpy(op, match, 8);
686 if (length > 16) {
687 wildCopy8(op + 8, match + 8, cpy);
688 }
689 }
690 op = cpy; // wildcopy correction
691 }
692
693 return int(op - dst); // Nb of output bytes decoded
694}
695
696} // namespace LZ4
ALWAYS_INLINE uint16_t unalignedLoad16(const void *p)
Definition aligned.hh:39
ALWAYS_INLINE void unalignedStore32(void *p, uint32_t v)
Definition aligned.hh:51
ALWAYS_INLINE uint32_t unalignedLoad32(const void *p)
Definition aligned.hh:42
#define ALWAYS_INLINE
Definition inline.hh:16
ALWAYS_INLINE uint16_t read_UA_L16(const void *p)
Definition endian.hh:229
ALWAYS_INLINE void write_UA_L16(void *p, uint16_t x)
Definition endian.hh:189
constexpr bool BIG
Definition endian.hh:15
Definition lz4.cc:24
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition lz4.cc:147
int decompress(const uint8_t *src, uint8_t *dst, int compressedSize, int dstCapacity)
Definition lz4.cc:480
ALWAYS_INLINE int compress_impl(const uint8_t *src, uint8_t *const dst, const int inputSize)
Definition lz4.cc:276
uintptr_t reg_t
Definition lz4.cc:45
int compress(const uint8_t *src, uint8_t *dst, int srcSize)
Definition lz4.cc:456
T length(const vecN< N, T > &x)
Definition gl_vec.hh:388
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *)
Definition lz4.cc:258
uint32_t getIndexOnHash(uint32_t) const
Definition lz4.cc:264
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition lz4.cc:261
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition lz4.cc:270
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *) const
Definition lz4.cc:267
void putIndexOnHash(uint32_t, uint32_t)
Definition lz4.cc:255
static uint32_t hashPosition(const uint8_t *p)
Definition lz4.cc:251
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition lz4.cc:239
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition lz4.cc:227
static uint32_t hashPosition(const uint8_t *p)
Definition lz4.cc:220
uint32_t getIndexOnHash(uint32_t h) const
Definition lz4.cc:236
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition lz4.cc:233
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition lz4.cc:230
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition lz4.cc:242
uint32_t getIndexOnHash(uint32_t h) const
Definition lz4.cc:205
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition lz4.cc:211
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition lz4.cc:202
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition lz4.cc:196
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition lz4.cc:208
static uint32_t hashPosition(const uint8_t *p)
Definition lz4.cc:192
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition lz4.cc:199
#define UNREACHABLE