openMSX
lz4.cc
Go to the documentation of this file.
1 #include "lz4.hh"
2 
3 #include "aligned.hh"
4 #include "endian.hh"
5 #include "inline.hh"
6 #include "likely.hh"
7 #include "unreachable.hh"
8 
9 #include "build-info.hh"
10 
11 #include <cstring>
12 
13 #ifdef _MSC_VER
14 # include <intrin.h>
15 # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
16 # pragma warning(disable : 4293) // disable: C4293: too large shift (32-bits)
17 #endif
18 
19 // 32 or 64 bits ?
20 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) )
21 # define LZ4_ARCH64 1
22 #else
23 # define LZ4_ARCH64 0
24 #endif
25 
26 namespace LZ4 {
27 
28 static const int MEMORY_USAGE = 14;
29 static const int HASHLOG = MEMORY_USAGE - 2;
30 static const int HASH_SIZE_U32 = 1 << HASHLOG;
31 static const int ACCELERATION = 1;
32 static const int MINMATCH = 4;
33 static const int WILDCOPYLENGTH = 8;
34 static const int LASTLITERALS = 5; // see ../doc/lz4_Block_format.md#parsing-restrictions
35 static const int MFLIMIT = 12; // see ../doc/lz4_Block_format.md#parsing-restrictions
36 static const int MATCH_SAFEGUARD_DISTANCE = 2 * WILDCOPYLENGTH - MINMATCH; // ensure it's possible to write 2 x wildcopyLength without overflowing output buffer
37 static const int FASTLOOP_SAFE_DISTANCE = 64;
38 static const int MIN_LENGTH = MFLIMIT + 1;
39 static const int DISTANCE_MAX = 65535;
40 static const int ML_BITS = 4;
41 static const int ML_MASK = (1 << ML_BITS) - 1;
42 static const int RUN_BITS = 8 - ML_BITS;
43 static const int RUN_MASK = (1 << RUN_BITS) - 1;
44 static const int LIMIT_64K = 0x10000 + (MFLIMIT - 1);
45 static const uint32_t SKIP_TRIGGER = 6; // Increase this value ==> compression run slower on incompressible data
46 
47 using reg_t = uintptr_t;
48 static const int STEPSIZE = sizeof(reg_t);
49 
50 
51 static reg_t read_ARCH(const uint8_t* p)
52 {
53  reg_t val;
54  memcpy(&val, p, sizeof(val));
55  return val;
56 }
57 
58 // customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd
59 static void wildCopy8(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
60 {
61  do {
62  memcpy(dst, src, 8);
63  dst += 8;
64  src += 8;
65  } while (dst < dstEnd);
66 }
67 
68 // customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
69 // this version copies two times 16 bytes (instead of one time 32 bytes)
70 // because it must be compatible with offsets >= 16.
71 static void wildCopy32(uint8_t* dst, const uint8_t* src, uint8_t* dstEnd)
72 {
73  do {
74  memcpy(dst + 0, src + 0, 16);
75  memcpy(dst + 16, src + 16, 16);
76  dst += 32;
77  src += 32;
78  } while (dst < dstEnd);
79 }
80 
81 static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
82 static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
83 
84 static void memcpy_using_offset_base(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, const size_t offset)
85 {
86  if (offset < 8) {
87  dstPtr[0] = srcPtr[0];
88  dstPtr[1] = srcPtr[1];
89  dstPtr[2] = srcPtr[2];
90  dstPtr[3] = srcPtr[3];
91  srcPtr += inc32table[offset];
92  memcpy(dstPtr + 4, srcPtr, 4);
93  srcPtr -= dec64table[offset];
94  dstPtr += 8;
95  } else {
96  memcpy(dstPtr, srcPtr, 8);
97  dstPtr += 8;
98  srcPtr += 8;
99  }
100 
101  wildCopy8(dstPtr, srcPtr, dstEnd);
102 }
103 
104 // memcpy_using_offset() presumes :
105 // - dstEnd >= dstPtr + MINMATCH
106 // - there is at least 8 bytes available to write after dstEnd
107 static void memcpy_using_offset(uint8_t* dstPtr, const uint8_t* srcPtr, uint8_t* dstEnd, size_t offset)
108 {
109  uint8_t v[8];
110 
111  unalignedStore32(dstPtr, 0); // silence an msan warning when offset==0
112 
113  switch (offset) {
114  case 1:
115  memset(v, *srcPtr, 8);
116  break;
117  case 2:
118  memcpy(v, srcPtr, 2);
119  memcpy(&v[2], srcPtr, 2);
120  memcpy(&v[4], &v[0], 4);
121  break;
122  case 4:
123  memcpy(v, srcPtr, 4);
124  memcpy(&v[4], srcPtr, 4);
125  break;
126  default:
127  memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
128  return;
129  }
130 
131  memcpy(dstPtr, v, 8);
132  dstPtr += 8;
133  while (dstPtr < dstEnd) {
134  memcpy(dstPtr, v, 8);
135  dstPtr += 8;
136  }
137 }
138 
139 static inline int NbCommonBytes(size_t val)
140 {
141 #if LZ4_ARCH64
142  if (openmsx::OPENMSX_BIGENDIAN) {
143 #ifdef _MSC_VER
144  unsigned long r;
145  _BitScanReverse64(&r, val);
146  return r >> 3;
147 #elif defined(__GNUC__)
148  return __builtin_clzll(val) >> 3;
149 #else
150  int r;
151  if (!(val >> 32)) { r = 4; } else { r = 0; val >>= 32; }
152  if (!(val >> 16)) { r += 2; val >>= 8; } else { val >>= 24; }
153  r += !val;
154  return r;
155 #endif
156  } else {
157 #ifdef _MSC_VER
158  unsigned long r;
159  _BitScanForward64(&r, val);
160  return r >> 3;
161 #elif defined(__GNUC__)
162  return __builtin_ctzll(val) >> 3;
163 #else
164  static const int DeBruijnBytePos[64] = {
165  0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
166  0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
167  7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
168  7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7,
169  };
170  return DeBruijnBytePos[(uint64_t((val & -val) * 0x0218A392CDABBD3F)) >> 58];
171 #endif
172  }
173 
174 #else // LZ4_ARCH64
175 
176  if (openmsx::OPENMSX_BIGENDIAN) {
177 #ifdef _MSC_VER
178  unsigned long r;
179  _BitScanReverse(&r, val);
180  return r >> 3;
181 #elif defined(__GNUC__)
182  return __builtin_clz(val) >> 3;
183 #else
184  int r;
185  if (!(val >> 16)) { r = 2; val >>= 8; } else { r = 0; val >>= 24; }
186  r += !val;
187  return r;
188 #endif
189  } else {
190 #ifdef _MSC_VER
191  unsigned long r;
192  _BitScanForward(&r, val);
193  return r >> 3;
194 #elif defined(__GNUC__)
195  return __builtin_ctz(val) >> 3;
196 #else
197  static const int DeBruijnBytePos[32] = {
198  0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
199  3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1,
200  };
201  return DeBruijnBytePos[(uint32_t((val & -val) * 0x077CB531U)) >> 27];
202 #endif
203  }
204 #endif // LZ4_ARCH64
205 }
206 
207 ALWAYS_INLINE unsigned count(const uint8_t* pIn, const uint8_t* pMatch, const uint8_t* pInLimit)
208 {
209  const uint8_t* const pStart = pIn;
210 
211  if (likely(pIn < pInLimit - (STEPSIZE - 1))) {
212  reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
213  if (!diff) {
214  pIn += STEPSIZE;
215  pMatch += STEPSIZE;
216  } else {
217  return NbCommonBytes(diff);
218  }
219  }
220  while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
221  reg_t diff = read_ARCH(pMatch) ^ read_ARCH(pIn);
222  if (!diff) {
223  pIn += STEPSIZE;
224  pMatch += STEPSIZE;
225  continue;
226  }
227  pIn += NbCommonBytes(diff);
228  return unsigned(pIn - pStart);
229  }
230 
231  if ((STEPSIZE == 8) && (pIn < (pInLimit - 3)) && (unalignedLoad32(pMatch) == unalignedLoad32(pIn))) {
232  pIn += 4;
233  pMatch += 4;
234  }
235  if ((pIn < (pInLimit - 1)) && (unalignedLoad16(pMatch) == unalignedLoad16(pIn))) {
236  pIn += 2;
237  pMatch += 2;
238  }
239  if ((pIn < pInLimit) && (*pMatch == *pIn)) {
240  pIn += 1;
241  }
242  return unsigned(pIn - pStart);
243 }
244 
245 
246 template<bool L64K, bool ARCH64> struct HashImpl;
247 
248 // byU16
249 template<bool ARCH64> struct HashImpl<true, ARCH64> {
250  alignas(uint64_t) uint16_t tab[1 << (HASHLOG + 1)] = {};
251 
252  static uint32_t hashPosition(const uint8_t* p) {
253  uint32_t sequence = unalignedLoad32(p);
254  return (sequence * 2654435761U) >> ((MINMATCH * 8) - (HASHLOG + 1));
255  }
256  void putIndexOnHash(uint32_t idx, uint32_t h) {
257  tab[h] = uint16_t(idx);
258  }
259  void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
260  tab[h] = uint16_t(p - srcBase);
261  }
262  void putPosition(const uint8_t* p, const uint8_t* srcBase) {
263  putPositionOnHash(p, hashPosition(p), srcBase);
264  }
265  uint32_t getIndexOnHash(uint32_t h) const {
266  return tab[h];
267  }
268  const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
269  return tab[h] + srcBase;
270  }
271  const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
272  return getPositionOnHash(hashPosition(p), srcBase);
273  }
274 };
275 
276 // byU32
277 template<> struct HashImpl<false, true> {
278  alignas(uint64_t) uint32_t tab[1 << HASHLOG] = {};
279 
280  static uint32_t hashPosition(const uint8_t* p) {
281  uint64_t sequence = read_ARCH(p);
282  const uint64_t prime = openmsx::OPENMSX_BIGENDIAN
283  ? 11400714785074694791ULL // 8 bytes
284  : 889523592379ULL; // 5 bytes
285  return uint32_t(((sequence << 24) * prime) >> (64 - HASHLOG));
286  }
287  void putIndexOnHash(uint32_t idx, uint32_t h) {
288  tab[h] = idx;
289  }
290  void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* srcBase) {
291  tab[h] = uint32_t(p - srcBase);
292  }
293  void putPosition(const uint8_t* p, const uint8_t* srcBase) {
294  putPositionOnHash(p, hashPosition(p), srcBase);
295  }
296  uint32_t getIndexOnHash(uint32_t h) const {
297  return tab[h];
298  }
299  const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* srcBase) const {
300  return tab[h] + srcBase;
301  }
302  const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
303  return getPositionOnHash(hashPosition(p), srcBase);
304  }
305 };
306 
307 // byPtr
308 template<> struct HashImpl<false, false> {
309  alignas(uint64_t) const uint8_t* tab[1 << HASHLOG] = {};
310 
311  static uint32_t hashPosition(const uint8_t* p) {
312  uint32_t sequence = unalignedLoad32(p);
313  return (sequence * 2654435761U) >> ((MINMATCH * 8) - HASHLOG);
314  }
315  void putIndexOnHash(uint32_t /*idx*/, uint32_t /*h*/) {
316  UNREACHABLE;
317  }
318  void putPositionOnHash(const uint8_t* p, uint32_t h, const uint8_t* /*srcBase*/) {
319  tab[h] = p;
320  }
321  void putPosition(const uint8_t* p, const uint8_t* srcBase) {
322  putPositionOnHash(p, hashPosition(p), srcBase);
323  }
324  uint32_t getIndexOnHash(uint32_t /*h*/) const {
325  UNREACHABLE; return 0;
326  }
327  const uint8_t* getPositionOnHash(uint32_t h, const uint8_t* /*srcBase*/) const {
328  return tab[h];
329  }
330  const uint8_t* getPosition(const uint8_t* p, const uint8_t* srcBase) const {
331  return getPositionOnHash(hashPosition(p), srcBase);
332  }
333 };
334 
335 template<bool L64K, bool ARCH64>
336 ALWAYS_INLINE int compress_impl(const uint8_t* src, uint8_t* const dst, const int inputSize)
337 {
338  HashImpl<L64K, ARCH64> hashTable;
339 
340  const uint8_t* ip = src;
341  uint8_t* op = dst;
342 
343  const uint8_t* anchor = src;
344  const uint8_t* const iend = ip + inputSize;
345  const uint8_t* const mflimitPlusOne = iend - MFLIMIT + 1;
346  const uint8_t* const matchlimit = iend - LASTLITERALS;
347 
348  uint32_t forwardH;
349 
350  if (inputSize < MIN_LENGTH) goto _last_literals; // Input too small, no compression (all literals)
351 
352  // First byte
353  hashTable.putPosition(ip, src);
354  ip++;
355  forwardH = hashTable.hashPosition(ip);
356 
357  while (true) {
358  // Find a match
359  const uint8_t* match;
360  if (!L64K && !ARCH64) { // byPtr
361  const uint8_t* forwardIp = ip;
362  int step = 1;
363  int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
364  do {
365  uint32_t h = forwardH;
366  ip = forwardIp;
367  forwardIp += step;
368  step = searchMatchNb++ >> SKIP_TRIGGER;
369 
370  if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
371 
372  match = hashTable.getPositionOnHash(h, src);
373  forwardH = hashTable.hashPosition(forwardIp);
374  hashTable.putPositionOnHash(ip, h, src);
375  } while ((match + DISTANCE_MAX < ip) ||
376  (unalignedLoad32(match) != unalignedLoad32(ip)));
377 
378  } else { // byU16 or byU32
379  const uint8_t* forwardIp = ip;
380  int step = 1;
381  int searchMatchNb = ACCELERATION << SKIP_TRIGGER;
382  while (true) {
383  uint32_t h = forwardH;
384  uint32_t current = uint32_t(forwardIp - src);
385  uint32_t matchIndex = hashTable.getIndexOnHash(h);
386  ip = forwardIp;
387  forwardIp += step;
388  step = searchMatchNb++ >> SKIP_TRIGGER;
389 
390  if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
391 
392  match = src + matchIndex;
393  forwardH = hashTable.hashPosition(forwardIp);
394  hashTable.putIndexOnHash(current, h);
395 
396  if (!L64K && (matchIndex + DISTANCE_MAX < current)) {
397  continue; // too far
398  }
399 
400  if (unalignedLoad32(match) == unalignedLoad32(ip)) {
401  break; // match found
402  }
403  }
404  }
405 
406  // Catch up
407  while (((ip > anchor) & (match > src)) && (unlikely(ip[-1] == match[-1]))) {
408  ip--;
409  match--;
410  }
411 
412  // Encode Literals
413  unsigned litLength = unsigned(ip - anchor);
414  uint8_t* token = op++;
415  if (litLength >= RUN_MASK) {
416  int len = int(litLength - RUN_MASK);
417  *token = RUN_MASK << ML_BITS;
418  while (len >= 255) {
419  *op++ = 255;
420  len -= 255;
421  }
422  *op++ = uint8_t(len);
423  } else {
424  *token = uint8_t(litLength << ML_BITS);
425  }
426 
427  // Copy Literals
428  wildCopy8(op, anchor, op + litLength);
429  op += litLength;
430 
431 _next_match:
432  // At this stage, the following variables must be correctly set:
433  // - ip : at start of LZ operation
434  // - match : at start of previous pattern occurence; can be within current prefix, or within extDict
435  // - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
436 
437  // Encode Offset
438  Endian::write_UA_L16(op, uint16_t(ip - match));
439  op += 2;
440 
441  // Encode MatchLength
442  unsigned matchCode = count(ip + MINMATCH, match + MINMATCH, matchlimit);
443  ip += size_t(matchCode) + MINMATCH;
444 
445  if (matchCode >= ML_MASK) {
446  *token += ML_MASK;
447  matchCode -= ML_MASK;
448  unalignedStore32(op, 0xFFFFFFFF);
449  while (matchCode >= 4 * 255) {
450  op += 4;
451  unalignedStore32(op, 0xFFFFFFFF);
452  matchCode -= 4 * 255;
453  }
454  op += matchCode / 255;
455  *op++ = uint8_t(matchCode % 255);
456  } else {
457  *token += uint8_t(matchCode);
458  }
459 
460  anchor = ip;
461 
462  // Test end of chunk
463  if (ip >= mflimitPlusOne) break;
464 
465  // Fill table
466  hashTable.putPosition(ip - 2, src);
467 
468  // Test next position
469  if (!L64K && !ARCH64) { // byPtr
470  match = hashTable.getPosition(ip, src);
471  hashTable.putPosition(ip, src);
472  if ((match + DISTANCE_MAX >= ip) && (unalignedLoad32(match) == unalignedLoad32(ip))) {
473  token = op++;
474  *token = 0;
475  goto _next_match;
476  }
477  } else { // byU16 or byU32
478  uint32_t h = hashTable.hashPosition(ip);
479  uint32_t current = uint32_t(ip - src);
480  uint32_t matchIndex = hashTable.getIndexOnHash(h);
481  match = src + matchIndex;
482  hashTable.putIndexOnHash(current, h);
483  if ((L64K || (matchIndex + DISTANCE_MAX >= current)) &&
484  (unalignedLoad32(match) == unalignedLoad32(ip))) {
485  token = op++;
486  *token = 0;
487  goto _next_match;
488  }
489  }
490 
491  // Prepare next loop
492  forwardH = hashTable.hashPosition(++ip);
493  }
494 
495 _last_literals:
496  // Encode Last Literals
497  size_t lastRun = size_t(iend - anchor);
498  if (lastRun >= RUN_MASK) {
499  size_t accumulator = lastRun - RUN_MASK;
500  *op++ = RUN_MASK << ML_BITS;
501  while (accumulator >= 255) {
502  *op++ = 255;
503  accumulator -= 255;
504  }
505  *op++ = uint8_t(accumulator);
506  } else {
507  *op++ = uint8_t(lastRun << ML_BITS);
508  }
509  memcpy(op, anchor, lastRun);
510  ip = anchor + lastRun;
511  op += lastRun;
512 
513  return int(op - dst);
514 }
515 
516 int compress(const uint8_t* src, uint8_t* dst, int srcSize)
517 {
518  if (srcSize < LIMIT_64K) {
519  return compress_impl<true, LZ4_ARCH64>(src, dst, srcSize);
520  } else {
521  return compress_impl<false, LZ4_ARCH64>(src, dst, srcSize);
522  }
523 }
524 
525 
526 
527 static ALWAYS_INLINE unsigned read_variable_length(const uint8_t** ip)
528 {
529  unsigned length = 0;
530  unsigned s;
531  do {
532  s = **ip;
533  (*ip)++;
534  length += s;
535  } while (s == 255);
536 
537  return length;
538 }
539 
540 int decompress(const uint8_t* src, uint8_t* dst, int srcSize, int outputSize)
541 {
542  const uint8_t* ip = src;
543  const uint8_t* const iend = ip + srcSize;
544 
545  uint8_t* op = dst;
546  uint8_t* const oend = op + outputSize;
547  uint8_t* cpy;
548 
549  // Set up the "end" pointers for the shortcut.
550  const uint8_t* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
551  const uint8_t* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
552 
553  const uint8_t* match;
554  size_t offset;
555  unsigned token;
556  size_t length;
557 
558  if ((oend - op) >= FASTLOOP_SAFE_DISTANCE) {
559  // Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE
560  while (true) {
561  // Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE
562  token = *ip++;
563  length = token >> ML_BITS; // literal length
564 
565  // decode literal length
566  if (length == RUN_MASK) {
567  length += read_variable_length(&ip);
568 
569  // copy literals
570  cpy = op + length;
571  if ((cpy > oend - 32) || (ip + length > iend - 32)) {
572  goto safe_literal_copy;
573  }
574  wildCopy32(op, ip, cpy);
575  ip += length;
576  op = cpy;
577  } else {
578  cpy = op + length;
579  // We don't need to check oend, since we check it once for each loop below
580  if (ip > iend - (16 + 1/*max lit + offset + nextToken*/)) {
581  goto safe_literal_copy;
582  }
583  // Literals can only be 14, but hope compilers optimize if we copy by a register size
584  memcpy(op, ip, 16);
585  ip += length;
586  op = cpy;
587  }
588 
589  // get offset
590  offset = Endian::read_UA_L16(ip);
591  ip += 2;
592  match = op - offset;
593 
594  // get matchlength
595  length = token & ML_MASK;
596 
597  if (length == ML_MASK) {
598  length += read_variable_length(&ip);
599  length += MINMATCH;
600  if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
601  goto safe_match_copy;
602  }
603  } else {
604  length += MINMATCH;
605  if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
606  goto safe_match_copy;
607  }
608 
609  // Fastpath check: Avoids a branch in wildCopy32 if true
610  if (match >= dst) {
611  if (offset >= 8) {
612  memcpy(op + 0, match + 0, 8);
613  memcpy(op + 8, match + 8, 8);
614  memcpy(op + 16, match + 16, 2);
615  op += length;
616  continue;
617  }
618  }
619  }
620 
621  // copy match within block
622  cpy = op + length;
623 
624  if (unlikely(offset < 16)) {
625  memcpy_using_offset(op, match, cpy, offset);
626  } else {
627  wildCopy32(op, match, cpy);
628  }
629 
630  op = cpy; // wildcopy correction
631  }
632  }
633 
634  // Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE
635  while (true) {
636  token = *ip++;
637  length = token >> ML_BITS; // literal length
638 
639  // A two-stage shortcut for the most common case:
640  // 1) If the literal length is 0..14, and there is enough space,
641  // enter the shortcut and copy 16 bytes on behalf of the literals
642  // (in the fast mode, only 8 bytes can be safely copied this way).
643  // 2) Further if the match length is 4..18, copy 18 bytes in a similar
644  // manner; but we ensure that there's enough space in the output for
645  // those 18 bytes earlier, upon entering the shortcut (in other words,
646  // there is a combined check for both stages).
647  if ((length != RUN_MASK) &&
648  // strictly "less than" on input, to re-enter the loop with at least one byte
649  likely((ip < shortiend) & (op <= shortoend))) {
650  // Copy the literals
651  memcpy(op, ip, 16);
652  op += length;
653  ip += length;
654 
655  // The second stage: prepare for match copying, decode full info.
656  // If it doesn't work out, the info won't be wasted.
657  length = token & ML_MASK; // match length
658  offset = Endian::read_UA_L16(ip);
659  ip += 2;
660  match = op - offset;
661 
662  // Do not deal with overlapping matches.
663  if ((length != ML_MASK) && (offset >= 8) && (match >= dst)) {
664  // Copy the match.
665  memcpy(op + 0, match + 0, 8);
666  memcpy(op + 8, match + 8, 8);
667  memcpy(op + 16, match + 16, 2);
668  op += length + MINMATCH;
669  // Both stages worked, load the next token.
670  continue;
671  }
672 
673  // The second stage didn't work out, but the info is ready.
674  // Propel it right to the point of match copying.
675  goto _copy_match;
676  }
677 
678  // decode literal length
679  if (length == RUN_MASK) {
680  length += read_variable_length(&ip);
681  }
682 
683  // copy literals
684  cpy = op + length;
685 safe_literal_copy:
686  if ((((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS))))) {
687  // We've either hit the input parsing restriction or the output parsing restriction.
688  // If we've hit the input parsing condition then this must be the last sequence.
689  // If we've hit the output parsing condition then we are either using partialDecoding
690  // or we've hit the output parsing condition.
691  memmove(op, ip, length); // supports overlapping memory regions, which only matters for in-place decompression scenarios
692  ip += length;
693  op += length;
694  break;
695  } else {
696  wildCopy8(op, ip, cpy); // may overwrite up to WILDCOPYLENGTH beyond cpy
697  ip += length;
698  op = cpy;
699  }
700 
701  // get offset
702  offset = Endian::read_UA_L16(ip);
703  ip += 2;
704  match = op - offset;
705 
706  // get matchlength
707  length = token & ML_MASK;
708 
709 _copy_match:
710  if (length == ML_MASK) {
711  length += read_variable_length(&ip);
712  }
713  length += MINMATCH;
714 
715 safe_match_copy:
716  // copy match within block
717  cpy = op + length;
718 
719  if (unlikely(offset < 8)) {
720  unalignedStore32(op, 0); // silence msan warning when offset == 0
721  op[0] = match[0];
722  op[1] = match[1];
723  op[2] = match[2];
724  op[3] = match[3];
725  match += inc32table[offset];
726  memcpy(op + 4, match, 4);
727  match -= dec64table[offset];
728  } else {
729  memcpy(op, match, 8);
730  match += 8;
731  }
732  op += 8;
733 
734  if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
735  uint8_t* const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
736  if (op < oCopyLimit) {
737  wildCopy8(op, match, oCopyLimit);
738  match += oCopyLimit - op;
739  op = oCopyLimit;
740  }
741  while (op < cpy) {
742  *op++ = *match++;
743  }
744  } else {
745  memcpy(op, match, 8);
746  if (length > 16) {
747  wildCopy8(op + 8, match + 8, cpy);
748  }
749  }
750  op = cpy; // wildcopy correction
751  }
752 
753  return int(op - dst); // Nb of output bytes decoded
754 }
755 
756 } // namespace LZ4
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:343
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition: lz4.cc:268
int compress(const uint8_t *src, uint8_t *dst, int srcSize)
Definition: lz4.cc:516
uint32_t getIndexOnHash(uint32_t h) const
Definition: lz4.cc:296
#define ALWAYS_INLINE
Definition: inline.hh:16
#define unlikely(x)
Definition: likely.hh:15
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition: lz4.cc:262
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *) const
Definition: lz4.cc:327
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition: lz4.cc:321
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition: lz4.cc:290
static uint32_t hashPosition(const uint8_t *p)
Definition: lz4.cc:280
static uint32_t hashPosition(const uint8_t *p)
Definition: lz4.cc:252
int decompress(const uint8_t *src, uint8_t *dst, int srcSize, int outputSize)
Definition: lz4.cc:540
uintptr_t reg_t
Definition: lz4.cc:47
constexpr auto step
Definition: eeprom.cc:9
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition: lz4.cc:330
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:207
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition: lz4.cc:302
void putIndexOnHash(uint32_t, uint32_t)
Definition: lz4.cc:315
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *srcBase)
Definition: lz4.cc:259
void putPositionOnHash(const uint8_t *p, uint32_t h, const uint8_t *)
Definition: lz4.cc:318
const uint8_t * getPositionOnHash(uint32_t h, const uint8_t *srcBase) const
Definition: lz4.cc:299
static uint32_t hashPosition(const uint8_t *p)
Definition: lz4.cc:311
#define likely(x)
Definition: likely.hh:14
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition: lz4.cc:287
void putPosition(const uint8_t *p, const uint8_t *srcBase)
Definition: lz4.cc:293
uint32_t getIndexOnHash(uint32_t) const
Definition: lz4.cc:324
const uint8_t * getPosition(const uint8_t *p, const uint8_t *srcBase) const
Definition: lz4.cc:271
void putIndexOnHash(uint32_t idx, uint32_t h)
Definition: lz4.cc:256
uint32_t getIndexOnHash(uint32_t h) const
Definition: lz4.cc:265
ALWAYS_INLINE int compress_impl(const uint8_t *src, uint8_t *const dst, const int inputSize)
Definition: lz4.cc:336
Definition: lz4.cc:26
#define UNREACHABLE
Definition: unreachable.hh:38