openMSX
lz4.cc
Go to the documentation of this file.
1#include "lz4.hh"
2
3#include "aligned.hh"
4#include "endian.hh"
5#include "inline.hh"
6#include "unreachable.hh"
7#include <array>
8#include <bit>
9#include <cstring>
10
11#ifdef _MSC_VER
12# include <intrin.h>
13# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
14# pragma warning(disable : 4293) // disable: C4293: too large shift (32-bits)
15#endif
16
17// 32 or 64 bits ?
18#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64))
19# define LZ4_ARCH64 1
20#else
21# define LZ4_ARCH64 0
22#endif
23
24namespace LZ4 {
25
26static constexpr int MEMORY_USAGE = 14;
27static constexpr int HASHLOG = MEMORY_USAGE - 2;
28static constexpr int ACCELERATION = 1;
29static constexpr int MINMATCH = 4;
30static constexpr int WILDCOPYLENGTH = 8;
31static constexpr int LASTLITERALS = 5; // see ../doc/lz4_Block_format.md#parsing-restrictions
32static constexpr int MFLIMIT = 12; // see ../doc/lz4_Block_format.md#parsing-restrictions
33static constexpr int MATCH_SAFEGUARD_DISTANCE = 2 * WILDCOPYLENGTH - MINMATCH; // ensure it's possible to write 2 x wildcopyLength without overflowing output buffer
34static constexpr int FASTLOOP_SAFE_DISTANCE = 64;
35static constexpr int MIN_LENGTH = MFLIMIT + 1;
36static constexpr int DISTANCE_MAX = 65535;
37static constexpr int ML_BITS = 4;
38static constexpr int ML_MASK = (1 << ML_BITS) - 1;
39static constexpr int RUN_BITS = 8 - ML_BITS;
40static constexpr int RUN_MASK = (1 << RUN_BITS) - 1;
41static constexpr int LIMIT_64K = 0x10000 + (MFLIMIT - 1);
42static constexpr uint32_t SKIP_TRIGGER = 6; // Increase this value ==> compression run slower on incompressible data
43
44using reg_t = uintptr_t;
45static constexpr int STEPSIZE = sizeof(reg_t);
46
47
48[[nodiscard]] static reg_t read_ARCH(const uint8_t* p)
49{
50 reg_t val;
51 memcpy(&val, p, sizeof(val));
52 return val;
53}
54
55// customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd
56static void wildCopy8(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
57{
58 do {
59 memcpy(dst, src, 8);
60 dst += 8;
61 src += 8;
62 } while (dst < dstEnd);
63}
64
65// customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
66// this version copies two times 16 bytes (instead of one time 32 bytes)
67// because it must be compatible with offsets >= 16.
68static void wildCopy32(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
69{
70 do {
71 memcpy(dst + 0, src + 0, 16);
72 memcpy(dst + 16, src + 16, 16);
73 dst += 32;
74 src += 32;
75 } while (dst < dstEnd);
76}
77
78static constexpr std::array<unsigned, 8> inc32table = {0, 1, 2, 1, 0, 4, 4, 4};
79static constexpr std::array<int , 8> dec64table = {0, 0, 0, -1, -4, 1, 2, 3};
80
81static void memcpy_using_offset_base(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, const size_t offset)
82{
83 if (offset < 8) {
84 dstPtr[0] = srcPtr[0];
85 dstPtr[1] = srcPtr[1];
86 dstPtr[2] = srcPtr[2];
87 dstPtr[3] = srcPtr[3];
88 srcPtr += inc32table[offset];
89 memcpy(dstPtr + 4, srcPtr, 4);
90 srcPtr -= dec64table[offset];
91 dstPtr += 8;
92 } else {
93 memcpy(dstPtr, srcPtr, 8);
94 dstPtr += 8;
95 srcPtr += 8;
96 }
97
98 wildCopy8(dstPtr, srcPtr, dstEnd);
99}
100
101// memcpy_using_offset() presumes :
102// - dstEnd >= dstPtr + MINMATCH
103// - there is at least 8 bytes available to write after dstEnd
104static void memcpy_using_offset(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, size_t offset)
105{
106 std::array<uint8_t, 8> v;
107
108 unalignedStore32(dstPtr, 0); // silence an msan warning when offset==0
109
110 switch (offset) {
111 case 1:
112 memset(v.data(), *srcPtr, 8);
113 break;
114 case 2:
115 memcpy(&v[0], srcPtr, 2);
116 memcpy(&v[2], srcPtr, 2);
117 memcpy(&v[4], &v[0], 4);
118 break;
119 case 4:
120 memcpy(&v[0], srcPtr, 4);
121 memcpy(&v[4], srcPtr, 4);
122 break;
123 default:
124 memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
125 return;
126 }
127
128 memcpy(dstPtr, v.data(), 8);
129 dstPtr += 8;
130 while (dstPtr < dstEnd) {
131 memcpy(dstPtr, v.data(), 8);
132 dstPtr += 8;
133 }
134}
135
136[[nodiscard]] static inline int NbCommonBytes(size_t val)
137{
138 if (val == 0) UNREACHABLE;
139 if constexpr (Endian::BIG) {
140 return std::countl_zero(val) >> 3;
141 } else {
142 return std::countr_zero(val) >> 3;
143 }
144}
145
146[[nodiscard]] ALWAYS_INLINE unsigned count(const uint8_t* pIn, const uint8_t* pMatch, const uint8_t* pInLimit)
147{
148 const uint8_t* const pStart = pIn;
149
150 if (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
151 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
152 if (!diff) {
153 pIn += STEPSIZE;
154 pMatch += STEPSIZE;
155 } else {
156 return NbCommonBytes(diff);
157 }
158 }
159 while (pIn < pInLimit - (STEPSIZE - 1)) [[likely]] {
160 reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
161 if (!diff) {
162 pIn += STEPSIZE;
163 pMatch += STEPSIZE;
164 continue;
165 }
166 pIn += NbCommonBytes(diff);
167 return unsigned(pIn - pStart);
168 }
169
170 if ((STEPSIZE == 8) && (pIn < (pInLimit - 3)) && (unalignedLoad32(pMatch) == unalignedLoad32(pIn))) {
171 pIn += 4;
172 pMatch += 4;
173 }
174 if ((pIn < (pInLimit - 1)) && (unalignedLoad16(pMatch) == unalignedLoad16(pIn))) {
175 pIn += 2;
176 pMatch += 2;
177 }
178 if ((pIn < pInLimit) && (*pMatch == *pIn)) {
179 pIn += 1;
180 }
181 return unsigned(pIn - pStart);
182}
183
184
185template<bool L64K, bool ARCH64> struct HashImpl;
186
187// byU16
188template<bool ARCH64> struct HashImpl<true, ARCH64> {
189 alignas(uint64_t) std::array<uint16_t, 1 << (HASHLOG + 1)> tab = {};
190
191 [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
192 uint32_t sequence = unalignedLoad32(p);
193 return (sequence * 2654435761U) >> ((MINMATCH * 8) - (HASHLOG + 1));
194 }
195 void putIndexOnHash(uint32_t idx, uint32_t h) {
196 tab[h] = uint16_t(idx);
197 }
198 void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
199 tab[h] = uint16_t(p - srcBase);
200 }
201 void putPosition(const uint8_t* p, const uint8_t* srcBase) {
202 putPositionOnHash(p, hashPosition(p), srcBase);
203 }
204 [[nodiscard]] uint32_t getIndexOnHash(uint32_t h) const {
205 return tab[h];
206 }
207 [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
208 return tab[h] + srcBase;
209 }
210 [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
211 return getPositionOnHash(hashPosition(p), srcBase);
212 }
213};
214
215// byU32
216template<> struct HashImpl<false, true> {
217 alignas(uint64_t) std::array<uint32_t, 1 << HASHLOG> tab = {};
218
219 [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
220 uint64_t sequence = read_ARCH(p);
221 const uint64_t prime = Endian::BIG
222 ? 11400714785074694791ULL // 8 bytes
223 : 889523592379ULL; // 5 bytes
224 return uint32_t(((sequence << 24) * prime) >> (64 - HASHLOG));
225 }
226 void putIndexOnHash(uint32_t idx, uint32_t h) {
227 tab[h] = idx;
228 }
229 void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
230 tab[h] = uint32_t(p - srcBase);
231 }
232 void putPosition(const uint8_t* p, const uint8_t* srcBase) {
233 putPositionOnHash(p, hashPosition(p), srcBase);
234 }
235 [[nodiscard]] uint32_t getIndexOnHash(uint32_t h) const {
236 return tab[h];
237 }
238 [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
239 return tab[h] + srcBase;
240 }
241 [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
242 return getPositionOnHash(hashPosition(p), srcBase);
243 }
244};
245
246// byPtr
247template<> struct HashImpl<false, false> {
248 alignas(uint64_t) std::array<const uint8_t*, 1 << HASHLOG> tab = {};
249
250 [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
251 uint32_t sequence = unalignedLoad32(p);
252 return (sequence * 2654435761U) >> ((MINMATCH * 8) - HASHLOG);
253 }
254 void putIndexOnHash(uint32_t /*idx*/, uint32_t /*h*/) {
256 }
257 void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* /*srcBase*/) {
258 tab[h] = p;
259 }
260 void putPosition(const uint8_t* p, const uint8_t* srcBase) {
261 putPositionOnHash(p, hashPosition(p), srcBase);
262 }
263 [[nodiscard]] uint32_t getIndexOnHash(uint32_t /*h*/) const {
265 }
266 [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* /*srcBase*/) const {
267 return tab[h];
268 }
269 [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
270 return getPositionOnHash(hashPosition(p), srcBase);
271 }
272};
273
274template<bool L64K, bool ARCH64>
275ALWAYS_INLINE int compress_impl(const uint8_t* src, uint8_t* const dst, const int inputSize)
276{
277 HashImpl<L64K, ARCH64> hashTable;
278
279 const uint8_t* ip = src;
280 uint8_t* op = dst;
281
282 const uint8_t* anchor = src;
283 const uint8_t* const iend = ip + inputSize;
284 const uint8_t* const mflimitPlusOne = iend - MFLIMIT + 1;
285 const uint8_t* const matchlimit = iend - LASTLITERALS;
286
287 uint32_t forwardH;
288
289 if (inputSize < MIN_LENGTH) goto _last_literals; // Input too small, no compression (all literals)
290
291 // First byte
292 hashTable.putPosition(ip, src);
293 ip++;
294 forwardH = hashTable.hashPosition(ip);
295
296 while (true) {
297 // Find a match
298 const uint8_t* match;
299 if constexpr (!L64K && !ARCH64) { // byPtr
300 const uint8_t* forwardIp = ip;
301 int step = 1;
302 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
303 do {
304 uint32_t h = forwardH;
305 ip = forwardIp;
306 forwardIp += step;
307 step = searchMatchNb++ >> SKIP_TRIGGER;
308
309 if (forwardIp > mflimitPlusOne) [[unlikely]] goto _last_literals;
310
311 match = hashTable.getPositionOnHash(h, src);
312 forwardH = hashTable.hashPosition(forwardIp);
313 hashTable.putPositionOnHash(ip, h, src);
314 } while ((match + DISTANCE_MAX < ip) ||
315 (unalignedLoad32(match) != unalignedLoad32(ip)));
316
317 } else { // byU16 or byU32
318 const uint8_t* forwardIp = ip;
319 int step = 1;
320 int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
321 while (true) {
322 auto h = forwardH;
323 auto current = uint32_t(forwardIp - src);
324 auto matchIndex = hashTable.getIndexOnHash(h);
325 ip = forwardIp;
326 forwardIp += step;
327 step = searchMatchNb++ >> SKIP_TRIGGER;
328
329 if (forwardIp > mflimitPlusOne) [[unlikely]] goto _last_literals;
330
331 match = src + matchIndex;
332 forwardH = hashTable.hashPosition(forwardIp);
333 hashTable.putIndexOnHash(current, h);
334
335 if (!L64K && (matchIndex + DISTANCE_MAX < current)) {
336 continue; // too far
337 }
338
339 if (unalignedLoad32(match) == unalignedLoad32(ip)) {
340 break; // match found
341 }
342 }
343 }
344
345 // Catch up
346 while (((ip > anchor) & (match > src)) && (/*unlikely*/(ip[-1] == match[-1]))) {
347 ip--;
348 match--;
349 }
350
351 // Encode Literals
352 auto litLength = unsigned(ip - anchor);
353 uint8_t* token = op++;
354 if (litLength >= RUN_MASK) {
355 auto len = int(litLength - RUN_MASK);
356 *token = RUN_MASK << ML_BITS;
357 while (len >= 255) {
358 *op++ = 255;
359 len -= 255;
360 }
361 *op++ = uint8_t(len);
362 } else {
363 *token = uint8_t(litLength << ML_BITS);
364 }
365
366 // Copy Literals
367 wildCopy8(op, anchor, op + litLength);
368 op += litLength;
369
370_next_match:
371 // At this stage, the following variables must be correctly set:
372 // - ip : at start of LZ operation
373 // - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
374 // - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
375
376 // Encode Offset
377 Endian::write_UA_L16(op, uint16_t(ip - match));
378 op += 2;
379
380 // Encode MatchLength
381 unsigned matchCode = count(ip + MINMATCH, match + MINMATCH, matchlimit);
382 ip += size_t(matchCode) + MINMATCH;
383
384 if (matchCode >= ML_MASK) {
385 *token += ML_MASK;
386 matchCode -= ML_MASK;
387 unalignedStore32(op, 0xFFFFFFFF);
388 while (matchCode >= 4 * 255) {
389 op += 4;
390 unalignedStore32(op, 0xFFFFFFFF);
391 matchCode -= 4 * 255;
392 }
393 op += matchCode / 255;
394 *op++ = uint8_t(matchCode % 255);
395 } else {
396 *token += uint8_t(matchCode);
397 }
398
399 anchor = ip;
400
401 // Test end of chunk
402 if (ip >= mflimitPlusOne) break;
403
404 // Fill table
405 hashTable.putPosition(ip - 2, src);
406
407 // Test next position
408 if constexpr (!L64K && !ARCH64) { // byPtr
409 match = hashTable.getPosition(ip, src);
410 hashTable.putPosition(ip, src);
411 if ((match + DISTANCE_MAX >= ip) && (unalignedLoad32(match) == unalignedLoad32(ip))) {
412 token = op++;
413 *token = 0;
414 goto _next_match;
415 }
416 } else { // byU16 or byU32
417 auto h = hashTable.hashPosition(ip);
418 auto current = uint32_t(ip - src);
419 auto matchIndex = hashTable.getIndexOnHash(h);
420 match = src + matchIndex;
421 hashTable.putIndexOnHash(current, h);
422 if ((L64K || (matchIndex + DISTANCE_MAX >= current)) &&
423 (unalignedLoad32(match) == unalignedLoad32(ip))) {
424 token = op++;
425 *token = 0;
426 goto _next_match;
427 }
428 }
429
430 // Prepare next loop
431 forwardH = hashTable.hashPosition(++ip);
432 }
433
434_last_literals:
435 // Encode Last Literals
436 auto lastRun = size_t(iend - anchor);
437 if (lastRun >= RUN_MASK) {
438 size_t accumulator = lastRun - RUN_MASK;
439 *op++ = RUN_MASK << ML_BITS;
440 while (accumulator >= 255) {
441 *op++ = 255;
442 accumulator -= 255;
443 }
444 *op++ = uint8_t(accumulator);
445 } else {
446 *op++ = uint8_t(lastRun << ML_BITS);
447 }
448 memcpy(op, anchor, lastRun);
449 ip = anchor + lastRun;
450 op += lastRun;
451
452 return int(op - dst);
453}
454
455int compress(const uint8_t* src, uint8_t* dst, int srcSize)
456{
457 if (srcSize < LIMIT_64K) {
458 return compress_impl<true, LZ4_ARCH64>(src, dst, srcSize);
459 } else {
460 return compress_impl<false, LZ4_ARCH64>(src, dst, srcSize);
461 }
462}
463
464
465
466static ALWAYS_INLINE unsigned read_variable_length(const uint8_t** ip)
467{
468 unsigned length = 0;
469 unsigned s;
470 do {
471 s = **ip;
472 (*ip)++;
473 length += s;
474 } while (s == 255);
475
476 return length;
477}
478
479int decompress(const uint8_t* src, uint8_t* dst, int compressedSize, int dstCapacity)
480{
481 const uint8_t* ip = src;
482 const uint8_t* const iend = ip + compressedSize;
483
484 uint8_t* op = dst;
485 uint8_t* const oend = op + dstCapacity;
486 uint8_t* cpy;
487
488 // Set up the "end" pointers for the shortcut.
489 const uint8_t* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
490 const uint8_t* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
491
492 const uint8_t* match;
493 size_t offset;
494 unsigned token;
495 size_t length;
496
497 if ((oend - op) >= FASTLOOP_SAFE_DISTANCE) {
498 // Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE
499 while (true) {
500 // Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE
501 token = *ip++;
502 length = token >> ML_BITS; // literal length
503
504 // decode literal length
505 if (length == RUN_MASK) {
506 length += read_variable_length(&ip);
507
508 // copy literals
509 cpy = op + length;
510 if ((cpy > oend - 32) || (ip + length > iend - 32)) {
511 goto safe_literal_copy;
512 }
513 wildCopy32(op, ip, cpy);
514 ip += length;
515 op = cpy;
516 } else {
517 cpy = op + length;
518 // We don't need to check oend, since we check it once for each loop below
519 if (ip > iend - (16 + 1/*max lit + offset + nextToken*/)) {
520 goto safe_literal_copy;
521 }
522 // Literals can only be 14, but hope compilers optimize if we copy by a register size
523 memcpy(op, ip, 16);
524 ip += length;
525 op = cpy;
526 }
527
528 // get offset
529 offset = Endian::read_UA_L16(ip);
530 ip += 2;
531 match = op - offset;
532
533 // get match-length
534 length = token & ML_MASK;
535
536 if (length == ML_MASK) {
537 length += read_variable_length(&ip);
538 length += MINMATCH;
539 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
540 goto safe_match_copy;
541 }
542 } else {
543 length += MINMATCH;
544 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
545 goto safe_match_copy;
546 }
547
548 // Fast path check: Avoids a branch in wildCopy32 if true
549 if ((match >= dst) && (offset >= 8)) {
550 memcpy(op + 0, match + 0, 8);
551 memcpy(op + 8, match + 8, 8);
552 memcpy(op + 16, match + 16, 2);
553 op += length;
554 continue;
555 }
556 }
557
558 // copy match within block
559 cpy = op + length;
560
561 if (offset < 16) [[unlikely]] {
562 memcpy_using_offset(op, match, cpy, offset);
563 } else {
564 wildCopy32(op, match, cpy);
565 }
566
567 op = cpy; // wildcopy correction
568 }
569 }
570
571 // Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE
572 while (true) {
573 token = *ip++;
574 length = token >> ML_BITS; // literal length
575
576 // A two-stage shortcut for the most common case:
577 // 1) If the literal length is 0..14, and there is enough space,
578 // enter the shortcut and copy 16 bytes on behalf of the literals
579 // (in the fast mode, only 8 bytes can be safely copied this way).
580 // 2) Further if the match length is 4..18, copy 18 bytes in a similar
581 // manner; but we ensure that there's enough space in the output for
582 // those 18 bytes earlier, upon entering the shortcut (in other words,
583 // there is a combined check for both stages).
584 if ((length != RUN_MASK) &&
585 // strictly "less than" on input, to re-enter the loop with at least one byte
586 /*likely*/((ip < shortiend) & (op <= shortoend))) {
587 // Copy the literals
588 memcpy(op, ip, 16);
589 op += length;
590 ip += length;
591
592 // The second stage: prepare for match copying, decode full info.
593 // If it doesn't work out, the info won't be wasted.
594 length = token & ML_MASK; // match length
595 offset = Endian::read_UA_L16(ip);
596 ip += 2;
597 match = op - offset;
598
599 // Do not deal with overlapping matches.
600 if ((length != ML_MASK) && (offset >= 8) && (match >= dst)) {
601 // Copy the match.
602 memcpy(op + 0, match + 0, 8);
603 memcpy(op + 8, match + 8, 8);
604 memcpy(op + 16, match + 16, 2);
605 op += length + MINMATCH;
606 // Both stages worked, load the next token.
607 continue;
608 }
609
610 // The second stage didn't work out, but the info is ready.
611 // Propel it right to the point of match copying.
612 goto _copy_match;
613 }
614
615 // decode literal length
616 if (length == RUN_MASK) {
617 length += read_variable_length(&ip);
618 }
619
620 // copy literals
621 cpy = op + length;
622safe_literal_copy:
623 if ((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS))) {
624 // We've either hit the input parsing restriction or the output parsing restriction.
625 // If we've hit the input parsing condition then this must be the last sequence.
626 // If we've hit the output parsing condition then we are either using partialDecoding
627 // or we've hit the output parsing condition.
628 memmove(op, ip, length); // supports overlapping memory regions, which only matters for in-place decompression scenarios
629 ip += length;
630 op += length;
631 break;
632 } else {
633 wildCopy8(op, ip, cpy); // may overwrite up to WILDCOPYLENGTH beyond cpy
634 ip += length;
635 op = cpy;
636 }
637
638 // get offset
639 offset = Endian::read_UA_L16(ip);
640 ip += 2;
641 match = op - offset;
642
643 // get matchlength
644 length = token & ML_MASK;
645
646_copy_match:
647 if (length == ML_MASK) {
648 length += read_variable_length(&ip);
649 }
650 length += MINMATCH;
651
652safe_match_copy:
653 // copy match within block
654 cpy = op + length;
655
656 if (offset < 8) [[unlikely]] {
657 unalignedStore32(op, 0); // silence msan warning when offset == 0
658 op[0] = match[0];
659 op[1] = match[1];
660 op[2] = match[2];
661 op[3] = match[3];
662 match += inc32table[offset];
663 memcpy(op + 4, match, 4);
664 match -= dec64table[offset];
665 } else {
666 memcpy(op, match, 8);
667 match += 8;
668 }
669 op += 8;
670
671 if (cpy > oend - MATCH_SAFEGUARD_DISTANCE) [[unlikely]] {
672 uint8_t* const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
673 if (op < oCopyLimit) {
674 wildCopy8(op, match, oCopyLimit);
675 match += oCopyLimit - op;
676 op = oCopyLimit;
677 }
678 while (op < cpy) {
679 *op++ = *match++;
680 }
681 } else {
682 memcpy(op, match, 8);
683 if (length > 16) {
684 wildCopy8(op + 8, match + 8, cpy);
685 }
686 }
687 op = cpy; // wildcopy correction
688 }
689
690 return int(op - dst); // Nb of output bytes decoded
691}
692
693} // namespace LZ4
ALWAYS_INLINE uint16_t unalignedLoad16(const void *p)
Definition aligned.hh:41
ALWAYS_INLINE void unalignedStore32(void *p, uint32_t v)
Definition aligned.hh:53
ALWAYS_INLINE uint32_t unalignedLoad32(const void *p)
Definition aligned.hh:44
#define ALWAYS_INLINE
Definition inline.hh:16
ALWAYS_INLINE uint16_t read_UA_L16(const void *p)
Definition endian.hh:230
ALWAYS_INLINE void write_UA_L16(void *p, uint16_t x)
Definition endian.hh:190
constexpr bool BIG
Definition endian.hh:16
Definition lz4.cc:24
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition lz4.cc:146
int decompress(const uint8_t *src, uint8_t *dst, int compressedSize, int dstCapacity)
Definition lz4.cc:479
ALWAYS_INLINE int compress_impl(const uint8_t *src, uint8_t *const dst, const int inputSize)
Definition lz4.cc:275
uintptr_t reg_t
Definition lz4.cc:44
int compress(const uint8_t *src, uint8_t *dst, int srcSize)
Definition lz4.cc:455
T length(const vecN< N, T > &x)
Definition gl_vec.hh:376
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *)
Definition lz4.cc:257
uint32_t getIndexOnHash(uint32_t) const
Definition lz4.cc:263
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition lz4.cc:260
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition lz4.cc:269
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *) const
Definition lz4.cc:266
void putIndexOnHash(uint32_t, uint32_t)
Definition lz4.cc:254
static uint32_t hashPosition(const uint8_t *p)
Definition lz4.cc:250
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition lz4.cc:238
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition lz4.cc:226
static uint32_t hashPosition(const uint8_t *p)
Definition lz4.cc:219
uint32_t getIndexOnHash(uint32_t h) const
Definition lz4.cc:235
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition lz4.cc:232
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition lz4.cc:229
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition lz4.cc:241
uint32_t getIndexOnHash(uint32_t h) const
Definition lz4.cc:204
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition lz4.cc:210
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition lz4.cc:201
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition lz4.cc:195
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition lz4.cc:207
static uint32_t hashPosition(const uint8_t *p)
Definition lz4.cc:191
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition lz4.cc:198
#define UNREACHABLE