openMSX
lz4.cc
Go to the documentation of this file.
1 #include "lz4.hh"
2 
3 #include "aligned.hh"
4 #include "endian.hh"
5 #include "inline.hh"
6 #include "likely.hh"
7 #include "unreachable.hh"
8 #include <bit>
9 #include <cstring>
10 
11 #ifdef _MSC_VER
12 # include <intrin.h>
13 # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
14 # pragma warning(disable : 4293) // disable: C4293: too large shift (32-bits)
15 #endif
16 
17 // 32 or 64 bits ?
18 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64))
19 # define LZ4_ARCH64 1
20 #else
21 # define LZ4_ARCH64 0
22 #endif
23 
24 namespace LZ4 {
25 
26 static constexpr int MEMORY_USAGE = 14;
27 static constexpr int HASHLOG = MEMORY_USAGE - 2;
28 static constexpr int HASH_SIZE_U32 = 1 << HASHLOG;
29 static constexpr int ACCELERATION = 1;
30 static constexpr int MINMATCH = 4;
31 static constexpr int WILDCOPYLENGTH = 8;
32 static constexpr int LASTLITERALS = 5; // see ../doc/lz4_Block_format.md#parsing-restrictions
33 static constexpr int MFLIMIT = 12; // see ../doc/lz4_Block_format.md#parsing-restrictions
34 static constexpr int MATCH_SAFEGUARD_DISTANCE = 2 * WILDCOPYLENGTH - MINMATCH; // ensure it's possible to write 2 x wildcopyLength without overflowing output buffer
35 static constexpr int FASTLOOP_SAFE_DISTANCE = 64;
36 static constexpr int MIN_LENGTH = MFLIMIT + 1;
37 static constexpr int DISTANCE_MAX = 65535;
38 static constexpr int ML_BITS = 4;
39 static constexpr int ML_MASK = (1 << ML_BITS) - 1;
40 static constexpr int RUN_BITS = 8 - ML_BITS;
41 static constexpr int RUN_MASK = (1 << RUN_BITS) - 1;
42 static constexpr int LIMIT_64K = 0x10000 + (MFLIMIT - 1);
43 static constexpr uint32_t SKIP_TRIGGER = 6; // Increase this value ==> compression run slower on incompressible data
44 
45 using reg_t = uintptr_t;
46 static constexpr int STEPSIZE = sizeof(reg_t);
47 
48 
49 [[nodiscard]] static reg_t read_ARCH(const uint8_t* p)
50 {
51  reg_t val;
52  memcpy(&val, p, sizeof(val));
53  return val;
54 }
55 
56 // customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd
57 static void wildCopy8(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
58 {
59  do {
60  memcpy(dst, src, 8);
61  dst += 8;
62  src += 8;
63  } while (dst < dstEnd);
64 }
65 
66 // customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
67 // this version copies two times 16 bytes (instead of one time 32 bytes)
68 // because it must be compatible with offsets >= 16.
69 static void wildCopy32(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
70 {
71  do {
72  memcpy(dst + 0, src + 0, 16);
73  memcpy(dst + 16, src + 16, 16);
74  dst += 32;
75  src += 32;
76  } while (dst < dstEnd);
77 }
78 
79 static constexpr unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
80 static constexpr int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
81 
82 static void memcpy_using_offset_base(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, const size_t offset)
83 {
84  if (offset < 8) {
85  dstPtr[0] = srcPtr[0];
86  dstPtr[1] = srcPtr[1];
87  dstPtr[2] = srcPtr[2];
88  dstPtr[3] = srcPtr[3];
89  srcPtr += inc32table[offset];
90  memcpy(dstPtr + 4, srcPtr, 4);
91  srcPtr -= dec64table[offset];
92  dstPtr += 8;
93  } else {
94  memcpy(dstPtr, srcPtr, 8);
95  dstPtr += 8;
96  srcPtr += 8;
97  }
98 
99  wildCopy8(dstPtr, srcPtr, dstEnd);
100 }
101 
102 // memcpy_using_offset() presumes :
103 // - dstEnd >= dstPtr + MINMATCH
104 // - there is at least 8 bytes available to write after dstEnd
105 static void memcpy_using_offset(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, size_t offset)
106 {
107  uint8_t v[8];
108 
109  unalignedStore32(dstPtr, 0); // silence an msan warning when offset==0
110 
111  switch (offset) {
112  case 1:
113  memset(v, *srcPtr, 8);
114  break;
115  case 2:
116  memcpy(v, srcPtr, 2);
117  memcpy(&v[2], srcPtr, 2);
118  memcpy(&v[4], &v[0], 4);
119  break;
120  case 4:
121  memcpy(v, srcPtr, 4);
122  memcpy(&v[4], srcPtr, 4);
123  break;
124  default:
125  memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
126  return;
127  }
128 
129  memcpy(dstPtr, v, 8);
130  dstPtr += 8;
131  while (dstPtr < dstEnd) {
132  memcpy(dstPtr, v, 8);
133  dstPtr += 8;
134  }
135 }
136 
137 [[nodiscard]] static inline int NbCommonBytes(size_t val)
138 {
139  if (val == 0) UNREACHABLE;
140  if constexpr (Endian::BIG) {
141  return std::countl_zero(val) >> 3;
142  } else {
143  return std::countr_zero(val) >> 3;
144  }
145 }
146 
147 [[nodiscard]] ALWAYS_INLINE unsigned count(const uint8_t* pIn, const uint8_t* pMatch, const uint8_t* pInLimit)
148 {
149  const uint8_t* const pStart = pIn;
150 
151  if (likely(pIn < pInLimit - (STEPSIZE - 1))) {
152  reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
153  if (!diff) {
154  pIn += STEPSIZE;
155  pMatch += STEPSIZE;
156  } else {
157  return NbCommonBytes(diff);
158  }
159  }
160  while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
161  reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
162  if (!diff) {
163  pIn += STEPSIZE;
164  pMatch += STEPSIZE;
165  continue;
166  }
167  pIn += NbCommonBytes(diff);
168  return unsigned(pIn - pStart);
169  }
170 
171  if ((STEPSIZE == 8) && (pIn < (pInLimit - 3)) && (unalignedLoad32(pMatch) == unalignedLoad32(pIn))) {
172  pIn += 4;
173  pMatch += 4;
174  }
175  if ((pIn < (pInLimit - 1)) && (unalignedLoad16(pMatch) == unalignedLoad16(pIn))) {
176  pIn += 2;
177  pMatch += 2;
178  }
179  if ((pIn < pInLimit) && (*pMatch == *pIn)) {
180  pIn += 1;
181  }
182  return unsigned(pIn - pStart);
183 }
184 
185 
186 template<bool L64K, bool ARCH64> struct HashImpl;
187 
188 // byU16
189 template<bool ARCH64> struct HashImpl<true, ARCH64> {
190  alignas(uint64_t) uint16_t tab[1 << (HASHLOG + 1)] = {};
191 
192  [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
193  uint32_t sequence = unalignedLoad32(p);
194  return (sequence * 2654435761U) >> ((MINMATCH * 8) - (HASHLOG + 1));
195  }
196  void putIndexOnHash(uint32_t idx, uint32_t h) {
197  tab[h] = uint16_t(idx);
198  }
199  void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
200  tab[h] = uint16_t(p - srcBase);
201  }
202  void putPosition(const uint8_t* p, const uint8_t* srcBase) {
203  putPositionOnHash(p, hashPosition(p), srcBase);
204  }
205  [[nodiscard]] uint32_t getIndexOnHash(uint32_t h) const {
206  return tab[h];
207  }
208  [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
209  return tab[h] + srcBase;
210  }
211  [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
212  return getPositionOnHash(hashPosition(p), srcBase);
213  }
214 };
215 
216 // byU32
217 template<> struct HashImpl<false, true> {
218  alignas(uint64_t) uint32_t tab[1 << HASHLOG] = {};
219 
220  [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
221  uint64_t sequence = read_ARCH(p);
222  const uint64_t prime = Endian::BIG
223  ? 11400714785074694791ULL // 8 bytes
224  : 889523592379ULL; // 5 bytes
225  return uint32_t(((sequence << 24) * prime) >> (64 - HASHLOG));
226  }
227  void putIndexOnHash(uint32_t idx, uint32_t h) {
228  tab[h] = idx;
229  }
230  void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
231  tab[h] = uint32_t(p - srcBase);
232  }
233  void putPosition(const uint8_t* p, const uint8_t* srcBase) {
234  putPositionOnHash(p, hashPosition(p), srcBase);
235  }
236  [[nodiscard]] uint32_t getIndexOnHash(uint32_t h) const {
237  return tab[h];
238  }
239  [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
240  return tab[h] + srcBase;
241  }
242  [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
243  return getPositionOnHash(hashPosition(p), srcBase);
244  }
245 };
246 
247 // byPtr
248 template<> struct HashImpl<false, false> {
249  alignas(uint64_t) const uint8_t* tab[1 << HASHLOG] = {};
250 
251  [[nodiscard]] static uint32_t hashPosition(const uint8_t* p) {
252  uint32_t sequence = unalignedLoad32(p);
253  return (sequence * 2654435761U) >> ((MINMATCH * 8) - HASHLOG);
254  }
255  void putIndexOnHash(uint32_t /*idx*/, uint32_t /*h*/) {
256  UNREACHABLE;
257  }
258  void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* /*srcBase*/) {
259  tab[h] = p;
260  }
261  void putPosition(const uint8_t* p, const uint8_t* srcBase) {
262  putPositionOnHash(p, hashPosition(p), srcBase);
263  }
264  [[nodiscard]] uint32_t getIndexOnHash(uint32_t /*h*/) const {
265  UNREACHABLE; return 0;
266  }
267  [[nodiscard]] const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* /*srcBase*/) const {
268  return tab[h];
269  }
270  [[nodiscard]] const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
271  return getPositionOnHash(hashPosition(p), srcBase);
272  }
273 };
274 
275 template<bool L64K, bool ARCH64>
276 ALWAYS_INLINE int compress_impl(const uint8_t* src, uint8_t* const dst, const int inputSize)
277 {
278  HashImpl<L64K, ARCH64> hashTable;
279 
280  const uint8_t* ip = src;
281  uint8_t* op = dst;
282 
283  const uint8_t* anchor = src;
284  const uint8_t* const iend = ip + inputSize;
285  const uint8_t* const mflimitPlusOne = iend - MFLIMIT + 1;
286  const uint8_t* const matchlimit = iend - LASTLITERALS;
287 
288  uint32_t forwardH;
289 
290  if (inputSize < MIN_LENGTH) goto _last_literals; // Input too small, no compression (all literals)
291 
292  // First byte
293  hashTable.putPosition(ip, src);
294  ip++;
295  forwardH = hashTable.hashPosition(ip);
296 
297  while (true) {
298  // Find a match
299  const uint8_t* match;
300  if constexpr (!L64K && !ARCH64) { // byPtr
301  const uint8_t* forwardIp = ip;
302  int step = 1;
303  int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
304  do {
305  uint32_t h = forwardH;
306  ip = forwardIp;
307  forwardIp += step;
308  step = searchMatchNb++ >> SKIP_TRIGGER;
309 
310  if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
311 
312  match = hashTable.getPositionOnHash(h, src);
313  forwardH = hashTable.hashPosition(forwardIp);
314  hashTable.putPositionOnHash(ip, h, src);
315  } while ((match + DISTANCE_MAX < ip) ||
316  (unalignedLoad32(match) != unalignedLoad32(ip)));
317 
318  } else { // byU16 or byU32
319  const uint8_t* forwardIp = ip;
320  int step = 1;
321  int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
322  while (true) {
323  auto h = forwardH;
324  auto current = uint32_t(forwardIp - src);
325  auto matchIndex = hashTable.getIndexOnHash(h);
326  ip = forwardIp;
327  forwardIp += step;
328  step = searchMatchNb++ >> SKIP_TRIGGER;
329 
330  if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
331 
332  match = src + matchIndex;
333  forwardH = hashTable.hashPosition(forwardIp);
334  hashTable.putIndexOnHash(current, h);
335 
336  if (!L64K && (matchIndex + DISTANCE_MAX < current)) {
337  continue; // too far
338  }
339 
340  if (unalignedLoad32(match) == unalignedLoad32(ip)) {
341  break; // match found
342  }
343  }
344  }
345 
346  // Catch up
347  while (((ip > anchor) & (match > src)) && (unlikely(ip[-1] == match[-1]))) {
348  ip--;
349  match--;
350  }
351 
352  // Encode Literals
353  auto litLength = unsigned(ip - anchor);
354  uint8_t* token = op++;
355  if (litLength >= RUN_MASK) {
356  int len = int(litLength - RUN_MASK);
357  *token = RUN_MASK << ML_BITS;
358  while (len >= 255) {
359  *op++ = 255;
360  len -= 255;
361  }
362  *op++ = uint8_t(len);
363  } else {
364  *token = uint8_t(litLength << ML_BITS);
365  }
366 
367  // Copy Literals
368  wildCopy8(op, anchor, op + litLength);
369  op += litLength;
370 
371 _next_match:
372  // At this stage, the following variables must be correctly set:
373  // - ip : at start of LZ operation
374  // - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
375  // - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
376 
377  // Encode Offset
378  Endian::write_UA_L16(op, uint16_t(ip - match));
379  op += 2;
380 
381  // Encode MatchLength
382  unsigned matchCode = count(ip + MINMATCH, match + MINMATCH, matchlimit);
383  ip += size_t(matchCode) + MINMATCH;
384 
385  if (matchCode >= ML_MASK) {
386  *token += ML_MASK;
387  matchCode -= ML_MASK;
388  unalignedStore32(op, 0xFFFFFFFF);
389  while (matchCode >= 4 * 255) {
390  op += 4;
391  unalignedStore32(op, 0xFFFFFFFF);
392  matchCode -= 4 * 255;
393  }
394  op += matchCode / 255;
395  *op++ = uint8_t(matchCode % 255);
396  } else {
397  *token += uint8_t(matchCode);
398  }
399 
400  anchor = ip;
401 
402  // Test end of chunk
403  if (ip >= mflimitPlusOne) break;
404 
405  // Fill table
406  hashTable.putPosition(ip - 2, src);
407 
408  // Test next position
409  if constexpr (!L64K && !ARCH64) { // byPtr
410  match = hashTable.getPosition(ip, src);
411  hashTable.putPosition(ip, src);
412  if ((match + DISTANCE_MAX >= ip) && (unalignedLoad32(match) == unalignedLoad32(ip))) {
413  token = op++;
414  *token = 0;
415  goto _next_match;
416  }
417  } else { // byU16 or byU32
418  auto h = hashTable.hashPosition(ip);
419  auto current = uint32_t(ip - src);
420  auto matchIndex = hashTable.getIndexOnHash(h);
421  match = src + matchIndex;
422  hashTable.putIndexOnHash(current, h);
423  if ((L64K || (matchIndex + DISTANCE_MAX >= current)) &&
424  (unalignedLoad32(match) == unalignedLoad32(ip))) {
425  token = op++;
426  *token = 0;
427  goto _next_match;
428  }
429  }
430 
431  // Prepare next loop
432  forwardH = hashTable.hashPosition(++ip);
433  }
434 
435 _last_literals:
436  // Encode Last Literals
437  auto lastRun = size_t(iend - anchor);
438  if (lastRun >= RUN_MASK) {
439  size_t accumulator = lastRun - RUN_MASK;
440  *op++ = RUN_MASK << ML_BITS;
441  while (accumulator >= 255) {
442  *op++ = 255;
443  accumulator -= 255;
444  }
445  *op++ = uint8_t(accumulator);
446  } else {
447  *op++ = uint8_t(lastRun << ML_BITS);
448  }
449  memcpy(op, anchor, lastRun);
450  ip = anchor + lastRun;
451  op += lastRun;
452 
453  return int(op - dst);
454 }
455 
456 int compress(const uint8_t* src, uint8_t* dst, int srcSize)
457 {
458  if (srcSize < LIMIT_64K) {
459  return compress_impl<true, LZ4_ARCH64>(src, dst, srcSize);
460  } else {
461  return compress_impl<false, LZ4_ARCH64>(src, dst, srcSize);
462  }
463 }
464 
465 
466 
467 static ALWAYS_INLINE unsigned read_variable_length(const uint8_t** ip)
468 {
469  unsigned length = 0;
470  unsigned s;
471  do {
472  s = **ip;
473  (*ip)++;
474  length += s;
475  } while (s == 255);
476 
477  return length;
478 }
479 
480 int decompress(const uint8_t* src, uint8_t* dst, int compressedSize, int dstCapacity)
481 {
482  const uint8_t* ip = src;
483  const uint8_t* const iend = ip + compressedSize;
484 
485  uint8_t* op = dst;
486  uint8_t* const oend = op + dstCapacity;
487  uint8_t* cpy;
488 
489  // Set up the "end" pointers for the shortcut.
490  const uint8_t* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
491  const uint8_t* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
492 
493  const uint8_t* match;
494  size_t offset;
495  unsigned token;
496  size_t length;
497 
498  if ((oend - op) >= FASTLOOP_SAFE_DISTANCE) {
499  // Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE
500  while (true) {
501  // Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE
502  token = *ip++;
503  length = token >> ML_BITS; // literal length
504 
505  // decode literal length
506  if (length == RUN_MASK) {
507  length += read_variable_length(&ip);
508 
509  // copy literals
510  cpy = op + length;
511  if ((cpy > oend - 32) || (ip + length > iend - 32)) {
512  goto safe_literal_copy;
513  }
514  wildCopy32(op, ip, cpy);
515  ip += length;
516  op = cpy;
517  } else {
518  cpy = op + length;
519  // We don't need to check oend, since we check it once for each loop below
520  if (ip > iend - (16 + 1/*max lit + offset + nextToken*/)) {
521  goto safe_literal_copy;
522  }
523  // Literals can only be 14, but hope compilers optimize if we copy by a register size
524  memcpy(op, ip, 16);
525  ip += length;
526  op = cpy;
527  }
528 
529  // get offset
530  offset = Endian::read_UA_L16(ip);
531  ip += 2;
532  match = op - offset;
533 
534  // get matchlength
535  length = token & ML_MASK;
536 
537  if (length == ML_MASK) {
538  length += read_variable_length(&ip);
539  length += MINMATCH;
540  if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
541  goto safe_match_copy;
542  }
543  } else {
544  length += MINMATCH;
545  if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
546  goto safe_match_copy;
547  }
548 
549  // Fastpath check: Avoids a branch in wildCopy32 if true
550  if (match >= dst) {
551  if (offset >= 8) {
552  memcpy(op + 0, match + 0, 8);
553  memcpy(op + 8, match + 8, 8);
554  memcpy(op + 16, match + 16, 2);
555  op += length;
556  continue;
557  }
558  }
559  }
560 
561  // copy match within block
562  cpy = op + length;
563 
564  if (unlikely(offset < 16)) {
565  memcpy_using_offset(op, match, cpy, offset);
566  } else {
567  wildCopy32(op, match, cpy);
568  }
569 
570  op = cpy; // wildcopy correction
571  }
572  }
573 
574  // Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE
575  while (true) {
576  token = *ip++;
577  length = token >> ML_BITS; // literal length
578 
579  // A two-stage shortcut for the most common case:
580  // 1) If the literal length is 0..14, and there is enough space,
581  // enter the shortcut and copy 16 bytes on behalf of the literals
582  // (in the fast mode, only 8 bytes can be safely copied this way).
583  // 2) Further if the match length is 4..18, copy 18 bytes in a similar
584  // manner; but we ensure that there's enough space in the output for
585  // those 18 bytes earlier, upon entering the shortcut (in other words,
586  // there is a combined check for both stages).
587  if ((length != RUN_MASK) &&
588  // strictly "less than" on input, to re-enter the loop with at least one byte
589  likely((ip < shortiend) & (op <= shortoend))) {
590  // Copy the literals
591  memcpy(op, ip, 16);
592  op += length;
593  ip += length;
594 
595  // The second stage: prepare for match copying, decode full info.
596  // If it doesn't work out, the info won't be wasted.
597  length = token & ML_MASK; // match length
598  offset = Endian::read_UA_L16(ip);
599  ip += 2;
600  match = op - offset;
601 
602  // Do not deal with overlapping matches.
603  if ((length != ML_MASK) && (offset >= 8) && (match >= dst)) {
604  // Copy the match.
605  memcpy(op + 0, match + 0, 8);
606  memcpy(op + 8, match + 8, 8);
607  memcpy(op + 16, match + 16, 2);
608  op += length + MINMATCH;
609  // Both stages worked, load the next token.
610  continue;
611  }
612 
613  // The second stage didn't work out, but the info is ready.
614  // Propel it right to the point of match copying.
615  goto _copy_match;
616  }
617 
618  // decode literal length
619  if (length == RUN_MASK) {
620  length += read_variable_length(&ip);
621  }
622 
623  // copy literals
624  cpy = op + length;
625 safe_literal_copy:
626  if ((((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS))))) {
627  // We've either hit the input parsing restriction or the output parsing restriction.
628  // If we've hit the input parsing condition then this must be the last sequence.
629  // If we've hit the output parsing condition then we are either using partialDecoding
630  // or we've hit the output parsing condition.
631  memmove(op, ip, length); // supports overlapping memory regions, which only matters for in-place decompression scenarios
632  ip += length;
633  op += length;
634  break;
635  } else {
636  wildCopy8(op, ip, cpy); // may overwrite up to WILDCOPYLENGTH beyond cpy
637  ip += length;
638  op = cpy;
639  }
640 
641  // get offset
642  offset = Endian::read_UA_L16(ip);
643  ip += 2;
644  match = op - offset;
645 
646  // get matchlength
647  length = token & ML_MASK;
648 
649 _copy_match:
650  if (length == ML_MASK) {
651  length += read_variable_length(&ip);
652  }
653  length += MINMATCH;
654 
655 safe_match_copy:
656  // copy match within block
657  cpy = op + length;
658 
659  if (unlikely(offset < 8)) {
660  unalignedStore32(op, 0); // silence msan warning when offset == 0
661  op[0] = match[0];
662  op[1] = match[1];
663  op[2] = match[2];
664  op[3] = match[3];
665  match += inc32table[offset];
666  memcpy(op + 4, match, 4);
667  match -= dec64table[offset];
668  } else {
669  memcpy(op, match, 8);
670  match += 8;
671  }
672  op += 8;
673 
674  if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
675  uint8_t* const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
676  if (op < oCopyLimit) {
677  wildCopy8(op, match, oCopyLimit);
678  match += oCopyLimit - op;
679  op = oCopyLimit;
680  }
681  while (op < cpy) {
682  *op++ = *match++;
683  }
684  } else {
685  memcpy(op, match, 8);
686  if (length > 16) {
687  wildCopy8(op + 8, match + 8, cpy);
688  }
689  }
690  op = cpy; // wildcopy correction
691  }
692 
693  return int(op - dst); // Nb of output bytes decoded
694 }
695 
696 } // namespace LZ4
ALWAYS_INLINE uint16_t unalignedLoad16(const void *p)
Definition: aligned.hh:39
ALWAYS_INLINE void unalignedStore32(void *p, uint32_t v)
Definition: aligned.hh:51
ALWAYS_INLINE uint32_t unalignedLoad32(const void *p)
Definition: aligned.hh:42
constexpr auto step
Definition: eeprom.cc:9
#define ALWAYS_INLINE
Definition: inline.hh:16
#define likely(x)
Definition: likely.hh:14
#define unlikely(x)
Definition: likely.hh:15
ALWAYS_INLINE uint16_t read_UA_L16(const void *p)
Definition: endian.hh:226
ALWAYS_INLINE void write_UA_L16(void *p, uint16_t x)
Definition: endian.hh:186
constexpr bool BIG
Definition: endian.hh:12
Definition: lz4.cc:24
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:147
int decompress(const uint8_t *src, uint8_t *dst, int compressedSize, int dstCapacity)
Definition: lz4.cc:480
ALWAYS_INLINE int compress_impl(const uint8_t *src, uint8_t *const dst, const int inputSize)
Definition: lz4.cc:276
uintptr_t reg_t
Definition: lz4.cc:45
int compress(const uint8_t *src, uint8_t *dst, int srcSize)
Definition: lz4.cc:456
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:343
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *)
Definition: lz4.cc:258
uint32_t getIndexOnHash(uint32_t) const
Definition: lz4.cc:264
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition: lz4.cc:261
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *) const
Definition: lz4.cc:267
void putIndexOnHash(uint32_t, uint32_t)
Definition: lz4.cc:255
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition: lz4.cc:270
static uint32_t hashPosition(const uint8_t *p)
Definition: lz4.cc:251
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition: lz4.cc:227
static uint32_t hashPosition(const uint8_t *p)
Definition: lz4.cc:220
uint32_t getIndexOnHash(uint32_t h) const
Definition: lz4.cc:236
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition: lz4.cc:233
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition: lz4.cc:239
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition: lz4.cc:230
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition: lz4.cc:242
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition: lz4.cc:208
uint32_t getIndexOnHash(uint32_t h) const
Definition: lz4.cc:205
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition: lz4.cc:211
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition: lz4.cc:202
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition: lz4.cc:196
static uint32_t hashPosition(const uint8_t *p)
Definition: lz4.cc:192
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition: lz4.cc:199
#define UNREACHABLE
Definition: unreachable.hh:38