openMSX
LineScalers.hh
Go to the documentation of this file.
1 #ifndef LINESCALERS_HH
2 #define LINESCALERS_HH
3 
4 #include "PixelOperations.hh"
5 #include "likely.hh"
6 #include "xrange.hh"
7 #include <type_traits>
8 #include <cstddef>
9 #include <cstring>
10 #include <cassert>
11 #ifdef __SSE2__
12 #include "emmintrin.h"
13 #endif
14 #ifdef __SSSE3__
15 #include "tmmintrin.h"
16 #endif
17 
18 namespace openmsx {
19 
20 // Tag classes
21 struct TagCopy {};
22 template<typename CLASS, typename TAG> struct IsTagged
23  : std::is_base_of<TAG, CLASS> {};
24 
25 
26 // Scalers
27 
35 template<typename Pixel> class Scale_1on3
36 {
37 public:
38  void operator()(const Pixel* in, Pixel* out, size_t width);
39 };
40 
41 template<typename Pixel> class Scale_1on4
42 {
43 public:
44  void operator()(const Pixel* in, Pixel* out, size_t width);
45 };
46 
47 template<typename Pixel> class Scale_1on6
48 {
49 public:
50  void operator()(const Pixel* in, Pixel* out, size_t width);
51 };
52 
53 template<typename Pixel> class Scale_1on2
54 {
55 public:
56  void operator()(const Pixel* in, Pixel* out, size_t width);
57 };
58 
59 template<typename Pixel> class Scale_1on1 : public TagCopy
60 {
61 public:
62  void operator()(const Pixel* in, Pixel* out, size_t width);
63 };
64 
65 template<typename Pixel> class Scale_2on1
66 {
67 public:
68  explicit Scale_2on1(PixelOperations<Pixel> pixelOps);
69  void operator()(const Pixel* in, Pixel* out, size_t width);
70 private:
71  PixelOperations<Pixel> pixelOps;
72 };
73 
74 template<typename Pixel> class Scale_6on1
75 {
76 public:
77  explicit Scale_6on1(PixelOperations<Pixel> pixelOps);
78  void operator()(const Pixel* in, Pixel* out, size_t width);
79 private:
80  PixelOperations<Pixel> pixelOps;
81 };
82 
83 template<typename Pixel> class Scale_4on1
84 {
85 public:
86  explicit Scale_4on1(PixelOperations<Pixel> pixelOps);
87  void operator()(const Pixel* in, Pixel* out, size_t width);
88 private:
89  PixelOperations<Pixel> pixelOps;
90 };
91 
92 template<typename Pixel> class Scale_3on1
93 {
94 public:
95  explicit Scale_3on1(PixelOperations<Pixel> pixelOps);
96  void operator()(const Pixel* in, Pixel* out, size_t width);
97 private:
98  PixelOperations<Pixel> pixelOps;
99 };
100 
101 template<typename Pixel> class Scale_3on2
102 {
103 public:
104  explicit Scale_3on2(PixelOperations<Pixel> pixelOps);
105  void operator()(const Pixel* in, Pixel* out, size_t width);
106 private:
107  PixelOperations<Pixel> pixelOps;
108 };
109 
110 template<typename Pixel> class Scale_3on4
111 {
112 public:
113  explicit Scale_3on4(PixelOperations<Pixel> pixelOps);
114  void operator()(const Pixel* in, Pixel* out, size_t width);
115 private:
116  PixelOperations<Pixel> pixelOps;
117 };
118 
119 template<typename Pixel> class Scale_3on8
120 {
121 public:
122  explicit Scale_3on8(PixelOperations<Pixel> pixelOps);
123  void operator()(const Pixel* in, Pixel* out, size_t width);
124 private:
125  PixelOperations<Pixel> pixelOps;
126 };
127 
128 template<typename Pixel> class Scale_2on3
129 {
130 public:
131  explicit Scale_2on3(PixelOperations<Pixel> pixelOps);
132  void operator()(const Pixel* in, Pixel* out, size_t width);
133 private:
134  PixelOperations<Pixel> pixelOps;
135 };
136 
137 template<typename Pixel> class Scale_4on3
138 {
139 public:
140  explicit Scale_4on3(PixelOperations<Pixel> pixelOps);
141  void operator()(const Pixel* in, Pixel* out, size_t width);
142 private:
143  PixelOperations<Pixel> pixelOps;
144 };
145 
146 template<typename Pixel> class Scale_8on3
147 {
148 public:
149  explicit Scale_8on3(PixelOperations<Pixel> pixelOps);
150  void operator()(const Pixel* in, Pixel* out, size_t width);
151 private:
152  PixelOperations<Pixel> pixelOps;
153 };
154 
155 template<typename Pixel> class Scale_2on9
156 {
157 public:
158  explicit Scale_2on9(PixelOperations<Pixel> pixelOps);
159  void operator()(const Pixel* in, Pixel* out, size_t width);
160 private:
161  PixelOperations<Pixel> pixelOps;
162 };
163 
164 template<typename Pixel> class Scale_4on9
165 {
166 public:
167  explicit Scale_4on9(PixelOperations<Pixel> pixelOps);
168  void operator()(const Pixel* in, Pixel* out, size_t width);
169 private:
170  PixelOperations<Pixel> pixelOps;
171 };
172 
173 template<typename Pixel> class Scale_8on9
174 {
175 public:
176  explicit Scale_8on9(PixelOperations<Pixel> pixelOps);
177  void operator()(const Pixel* in, Pixel* out, size_t width);
178 private:
179  PixelOperations<Pixel> pixelOps;
180 };
181 
182 template<typename Pixel> class Scale_4on5
183 {
184 public:
185  explicit Scale_4on5(PixelOperations<Pixel> pixelOps);
186  void operator()(const Pixel* in, Pixel* out, size_t width);
187 private:
188  PixelOperations<Pixel> pixelOps;
189 };
190 
191 template<typename Pixel> class Scale_7on8
192 {
193 public:
194  explicit Scale_7on8(PixelOperations<Pixel> pixelOps);
195  void operator()(const Pixel* in, Pixel* out, size_t width);
196 private:
197  PixelOperations<Pixel> pixelOps;
198 };
199 
200 template<typename Pixel> class Scale_17on20
201 {
202 public:
203  explicit Scale_17on20(PixelOperations<Pixel> pixelOps);
204  void operator()(const Pixel* in, Pixel* out, size_t width);
205 private:
206  PixelOperations<Pixel> pixelOps;
207 };
208 
209 template<typename Pixel> class Scale_9on10
210 {
211 public:
212  explicit Scale_9on10(PixelOperations<Pixel> pixelOps);
213  void operator()(const Pixel* in, Pixel* out, size_t width);
214 private:
215  PixelOperations<Pixel> pixelOps;
216 };
217 
218 
226 template<typename Pixel, unsigned w1 = 1, unsigned w2 = 1> class BlendLines
227 {
228 public:
229  explicit BlendLines(PixelOperations<Pixel> pixelOps);
230  void operator()(const Pixel* in1, const Pixel* in2,
231  Pixel* out, size_t width);
232 private:
233  PixelOperations<Pixel> pixelOps;
234 };
235 
238 template<typename Pixel>
239 class ZoomLine
240 {
241 public:
242  explicit ZoomLine(PixelOperations<Pixel> pixelOps);
243  void operator()(const Pixel* in, unsigned inWidth,
244  Pixel* out, unsigned outWidth) const;
245 private:
246  PixelOperations<Pixel> pixelOps;
247 };
248 
249 
258 template<typename Pixel> class AlphaBlendLines
259 {
260 public:
261  explicit AlphaBlendLines(PixelOperations<Pixel> pixelOps);
262  void operator()(const Pixel* in1, const Pixel* in2,
263  Pixel* out, size_t width);
264  void operator()(Pixel in1, const Pixel* in2,
265  Pixel* out, size_t width);
266 private:
267  PixelOperations<Pixel> pixelOps;
268 };
269 
270 
283 template<typename Pixel>
285 {
286 public:
295  virtual void operator()(const Pixel* in, Pixel* out, size_t outWidth) = 0;
296 
302  [[nodiscard]] virtual bool isCopy() const = 0;
303 
304 protected:
305  ~PolyLineScaler() = default;
306 };
307 
311 template<typename Pixel, typename Scaler>
312 class PolyScale final : public PolyLineScaler<Pixel>
313 {
314 public:
316  : scaler()
317  {
318  }
320  : scaler(pixelOps)
321  {
322  }
323  void operator()(const Pixel* in, Pixel* out, size_t outWidth) override
324  {
325  scaler(in, out, outWidth);
326  }
327  [[nodiscard]] bool isCopy() const override
328  {
330  }
331 private:
332  Scaler scaler;
333 };
334 
338 template<typename Pixel, typename Scaler>
339 class PolyScaleRef final : public PolyLineScaler<Pixel>
340 {
341 public:
342  explicit PolyScaleRef(Scaler& scaler_)
343  : scaler(scaler_)
344  {
345  }
346  void operator()(const Pixel* in, Pixel* out, size_t outWidth) override
347  {
348  scaler(in, out, outWidth);
349  }
350  [[nodiscard]] bool isCopy() const override
351  {
353  }
354 private:
355  Scaler& scaler;
356 };
357 
358 
359 // implementation
360 
361 template<typename Pixel, unsigned N>
362 static inline void scale_1onN(
363  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
364 {
365  size_t i = 0, j = 0;
366  for (/* */; i < (width - (N - 1)); i += N, j += 1) {
367  Pixel pix = in[j];
368  for (auto k : xrange(N)) {
369  out[i + k] = pix;
370  }
371  }
372  for (auto k : xrange(N - 1)) {
373  if ((i + k) < width) out[i + k] = 0;
374  }
375 }
376 
377 template<typename Pixel>
378 void Scale_1on3<Pixel>::operator()(const Pixel* in, Pixel* out, size_t width)
379 {
380  scale_1onN<Pixel, 3>(in, out, width);
381 }
382 
383 template<typename Pixel>
384 void Scale_1on4<Pixel>::operator()(const Pixel* in, Pixel* out, size_t width)
385 {
386  scale_1onN<Pixel, 4>(in, out, width);
387 }
388 
389 template<typename Pixel>
390 void Scale_1on6<Pixel>::operator()(const Pixel* in, Pixel* out, size_t width)
391 {
392  scale_1onN<Pixel, 6>(in, out, width);
393 }
394 
395 #ifdef __SSE2__
396 template<typename Pixel> inline __m128i unpacklo(__m128i x, __m128i y)
397 {
398  if (sizeof(Pixel) == 4) {
399  return _mm_unpacklo_epi32(x, y);
400  } else if (sizeof(Pixel) == 2) {
401  return _mm_unpacklo_epi16(x, y);
402  } else {
403  UNREACHABLE;
404  }
405 }
406 template<typename Pixel> inline __m128i unpackhi(__m128i x, __m128i y)
407 {
408  if (sizeof(Pixel) == 4) {
409  return _mm_unpackhi_epi32(x, y);
410  } else if (sizeof(Pixel) == 2) {
411  return _mm_unpackhi_epi16(x, y);
412  } else {
413  UNREACHABLE;
414  }
415 }
416 
417 template<typename Pixel>
418 inline void scale_1on2_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth)
419 {
420  size_t bytes = srcWidth * sizeof(Pixel);
421  assert((bytes % (4 * sizeof(__m128i))) == 0);
422  assert(bytes != 0);
423 
424  const auto* in = reinterpret_cast<const char*>(in_) + bytes;
425  auto* out = reinterpret_cast< char*>(out_) + 2 * bytes;
426 
427  auto x = -ptrdiff_t(bytes);
428  do {
429  __m128i a0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 0));
430  __m128i a1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 16));
431  __m128i a2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 32));
432  __m128i a3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 48));
433  __m128i l0 = unpacklo<Pixel>(a0, a0);
434  __m128i h0 = unpackhi<Pixel>(a0, a0);
435  __m128i l1 = unpacklo<Pixel>(a1, a1);
436  __m128i h1 = unpackhi<Pixel>(a1, a1);
437  __m128i l2 = unpacklo<Pixel>(a2, a2);
438  __m128i h2 = unpackhi<Pixel>(a2, a2);
439  __m128i l3 = unpacklo<Pixel>(a3, a3);
440  __m128i h3 = unpackhi<Pixel>(a3, a3);
441  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 0), l0);
442  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 16), h0);
443  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 32), l1);
444  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 48), h1);
445  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 64), l2);
446  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 80), h2);
447  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 96), l3);
448  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 112), h3);
449  x += 4 * sizeof(__m128i);
450  } while (x < 0);
451 }
452 #endif
453 
454 template<typename Pixel>
456  const Pixel* __restrict in, Pixel* __restrict out, size_t dstWidth)
457 {
458  // This is a fairly simple algorithm (output each input pixel twice).
459  // An ideal compiler should generate optimal (vector) code for it.
460  // I checked the 2013-05-29 dev snapshots of gcc-4.9 and clang-3.4:
461  // - Clang is not able to vectorize this loop. My best tuned C version
462  // of this routine is a little over 4x slower than the tuned
463  // SSE-intrinsics version.
464  // - Gcc can auto-vectorize this routine. Though my best tuned version
465  // (I mean tuned to further improve the auto-vectorization, including
466  // using the new __builtin_assume_aligned() intrinsic) still runs
467  // approx 40% slower than the intrinsics version.
468  // Hopefully in some years the compilers have improved further so that
469  // the intrinsic version is no longer needed.
470  size_t srcWidth = dstWidth / 2;
471 
472 #ifdef __SSE2__
473  size_t chunk = 4 * sizeof(__m128i) / sizeof(Pixel);
474  size_t srcWidth2 = srcWidth & ~(chunk - 1);
475  scale_1on2_SSE(in, out, srcWidth2);
476  in += srcWidth2;
477  out += 2 * srcWidth2;
478  srcWidth -= srcWidth2;
479 #endif
480 
481  // C++ version. Used both on non-x86 machines and (possibly) on x86 for
482  // the last few pixels of the line.
483  for (auto x : xrange(srcWidth)) {
484  out[x * 2] = out[x * 2 + 1] = in[x];
485  }
486 }
487 
488 #ifdef __SSE2__
489 // Memcpy-like routine, it can be faster than a generic memcpy because:
490 // - It requires that both input and output are 16-bytes aligned.
491 // - It can only copy (non-zero) integer multiples of 128 bytes.
492 inline void memcpy_SSE_128(
493  const void* __restrict in_, void* __restrict out_, size_t size)
494 {
495  assert((reinterpret_cast<size_t>(in_ ) % 16) == 0);
496  assert((reinterpret_cast<size_t>(out_) % 16) == 0);
497  assert((size % 128) == 0);
498  assert(size != 0);
499 
500  const auto* in = reinterpret_cast<const __m128i*>(in_);
501  auto* out = reinterpret_cast< __m128i*>(out_);
502  const auto* end = in + (size / sizeof(__m128i));
503  do {
504  out[0] = in[0];
505  out[1] = in[1];
506  out[2] = in[2];
507  out[3] = in[3];
508  out[4] = in[4];
509  out[5] = in[5];
510  out[6] = in[6];
511  out[7] = in[7];
512  in += 8;
513  out += 8;
514  } while (in != end);
515 }
516 #endif
517 
518 template<typename Pixel>
520  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
521 {
522  size_t nBytes = width * sizeof(Pixel);
523 
524 #ifdef __SSE2__
525  // When using a very recent gcc/clang, this routine is only about
526  // 10% faster than a simple memcpy(). When using gcc-4.6 (still the
527  // default on many systems), it's still about 66% faster.
528  size_t n128 = nBytes & ~127;
529  memcpy_SSE_128(in, out, n128); // copy 128 byte chunks
530  nBytes &= 127; // remaning bytes (if any)
531  if (likely(nBytes == 0)) return;
532  in += n128 / sizeof(Pixel);
533  out += n128 / sizeof(Pixel);
534 #endif
535 
536  memcpy(out, in, nBytes);
537 }
538 
539 
540 template<typename Pixel>
542  : pixelOps(pixelOps_)
543 {
544 }
545 
546 #ifdef __SSE2__
547 template<int IMM8> static inline __m128i shuffle(__m128i x, __m128i y)
548 {
549  return _mm_castps_si128(_mm_shuffle_ps(
550  _mm_castsi128_ps(x), _mm_castsi128_ps(y), IMM8));
551 }
552 
553 template<typename Pixel>
554 inline __m128i blend(__m128i x, __m128i y, Pixel mask)
555 {
556  if (sizeof(Pixel) == 4) {
557  // 32bpp
558  __m128i p = shuffle<0x88>(x, y);
559  __m128i q = shuffle<0xDD>(x, y);
560  return _mm_avg_epu8(p, q);
561  } else {
562  // 16bpp, first shuffle odd/even pixels in the right position
563 #ifdef __SSSE3__
564  // This can be done faster using SSSE3
565  const __m128i LL = _mm_set_epi8(
566  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
567  0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00);
568  const __m128i HL = _mm_set_epi8(
569  0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00,
570  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
571  const __m128i LH = _mm_set_epi8(
572  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
573  0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02);
574  const __m128i HH = _mm_set_epi8(
575  0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02,
576  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
577  __m128i ll = _mm_shuffle_epi8(x, LL);
578  __m128i hl = _mm_shuffle_epi8(y, HL);
579  __m128i lh = _mm_shuffle_epi8(x, LH);
580  __m128i hh = _mm_shuffle_epi8(y, HH);
581  __m128i p = _mm_or_si128(ll, hl);
582  __m128i q = _mm_or_si128(lh, hh);
583 #else
584  // For SSE2 this only generates 1 instruction more, but with
585  // longer dependency chains
586  __m128i s = _mm_unpacklo_epi16(x, y);
587  __m128i t = _mm_unpackhi_epi16(x, y);
588  __m128i u = _mm_unpacklo_epi16(s, t);
589  __m128i v = _mm_unpackhi_epi16(s, t);
590  __m128i p = _mm_unpacklo_epi16(u, v);
591  __m128i q = _mm_unpackhi_epi16(u, v);
592 #endif
593  // Actually blend: (p & q) + (((p ^ q) & mask) >> 1)
594  __m128i m = _mm_set1_epi16(mask);
595  __m128i a = _mm_and_si128(p, q);
596  __m128i b = _mm_xor_si128(p, q);
597  __m128i c = _mm_and_si128(b, m);
598  __m128i d = _mm_srli_epi16(c, 1);
599  return _mm_add_epi16(a, d);
600  }
601 }
602 
603 template<typename Pixel>
604 inline void scale_2on1_SSE(
605  const Pixel* __restrict in_, Pixel* __restrict out_, size_t dstBytes,
606  Pixel mask)
607 {
608  assert((dstBytes % (4 * sizeof(__m128i))) == 0);
609  assert(dstBytes != 0);
610 
611  const auto* in = reinterpret_cast<const char*>(in_) + 2 * dstBytes;
612  auto* out = reinterpret_cast< char*>(out_) + dstBytes;
613 
614  auto x = -ptrdiff_t(dstBytes);
615  do {
616  __m128i a0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 0));
617  __m128i a1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 16));
618  __m128i a2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 32));
619  __m128i a3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 48));
620  __m128i a4 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 64));
621  __m128i a5 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 80));
622  __m128i a6 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 96));
623  __m128i a7 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 112));
624  __m128i b0 = blend(a0, a1, mask);
625  __m128i b1 = blend(a2, a3, mask);
626  __m128i b2 = blend(a4, a5, mask);
627  __m128i b3 = blend(a6, a7, mask);
628  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 0), b0);
629  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 16), b1);
630  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 32), b2);
631  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 48), b3);
632  x += 4 * sizeof(__m128i);
633  } while (x < 0);
634 }
635 #endif
636 
637 template<typename Pixel>
639  const Pixel* __restrict in, Pixel* __restrict out, size_t dstWidth)
640 {
641 #ifdef __SSE2__
642  size_t n64 = (dstWidth * sizeof(Pixel)) & ~63;
643  Pixel mask = pixelOps.getBlendMask();
644  scale_2on1_SSE(in, out, n64, mask); // process 64 byte chunks
645  dstWidth &= ((64 / sizeof(Pixel)) - 1); // remaning pixels (if any)
646  if (likely(dstWidth == 0)) return;
647  in += (2 * n64) / sizeof(Pixel);
648  out += n64 / sizeof(Pixel);
649 #endif
650 
651  // pure C++ version
652  for (auto i : xrange(dstWidth)) {
653  out[i] = pixelOps.template blend<1, 1>(
654  in[2 * i + 0], in[2 * i + 1]);
655  }
656 }
657 
658 
659 template<typename Pixel>
661  : pixelOps(pixelOps_)
662 {
663 }
664 
665 template<typename Pixel>
667  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
668 {
669  for (auto i : xrange(width)) {
670  out[i] = pixelOps.template blend6<1, 1, 1, 1, 1, 1>(&in[6 * i]);
671  }
672 }
673 
674 
675 template<typename Pixel>
677  : pixelOps(pixelOps_)
678 {
679 }
680 
681 template<typename Pixel>
683  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
684 {
685  for (auto i : xrange(width)) {
686  out[i] = pixelOps.template blend4<1, 1, 1, 1>(&in[4 * i]);
687  }
688 }
689 
690 
691 template<typename Pixel>
693  : pixelOps(pixelOps_)
694 {
695 }
696 
697 template<typename Pixel>
699  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
700 {
701  for (auto i : xrange(width)) {
702  out[i] = pixelOps.template blend3<1, 1, 1>(&in[3 * i]);
703  }
704 }
705 
706 
707 template<typename Pixel>
709  : pixelOps(pixelOps_)
710 {
711 }
712 
713 template<typename Pixel>
715  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
716 {
717  size_t i = 0, j = 0;
718  for (/* */; i < (width - 1); i += 2, j += 3) {
719  out[i + 0] = pixelOps.template blend2<2, 1>(&in[j + 0]);
720  out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 1]);
721  }
722  if (i < width) out[i] = 0;
723 }
724 
725 
726 template<typename Pixel>
728  : pixelOps(pixelOps_)
729 {
730 }
731 
732 template<typename Pixel>
734  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
735 {
736  size_t i = 0, j = 0;
737  for (/* */; i < (width - 3); i += 4, j += 3) {
738  out[i + 0] = in[j + 0];
739  out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 0]);
740  out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 1]);
741  out[i + 3] = in[j + 2];
742  }
743  for (auto k : xrange(4 - 1)) {
744  if ((i + k) < width) out[i + k] = 0;
745  }
746 }
747 
748 
749 template<typename Pixel>
751  : pixelOps(pixelOps_)
752 {
753 }
754 
755 template<typename Pixel>
757  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
758 {
759  size_t i = 0, j = 0;
760  for (/* */; i < (width - 7); i += 8, j += 3) {
761  out[i + 0] = in[j + 0];
762  out[i + 1] = in[j + 0];
763  out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 0]);
764  out[i + 3] = in[j + 1];
765  out[i + 4] = in[j + 1];
766  out[i + 5] = pixelOps.template blend2<1, 2>(&in[j + 1]);
767  out[i + 6] = in[j + 2];
768  out[i + 7] = in[j + 2];
769  }
770  for (auto k : xrange(8 - 1)) {
771  if ((i + k) < width) out[i + k] = 0;
772  }
773 }
774 
775 
776 template<typename Pixel>
778  : pixelOps(pixelOps_)
779 {
780 }
781 
782 template<typename Pixel>
784  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
785 {
786  size_t i = 0, j = 0;
787  for (/* */; i < (width - 2); i += 3, j += 2) {
788  out[i + 0] = in[j + 0];
789  out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 0]);
790  out[i + 2] = in[j + 1];
791  }
792  if ((i + 0) < width) out[i + 0] = 0;
793  if ((i + 1) < width) out[i + 1] = 0;
794 }
795 
796 
797 template<typename Pixel>
799  : pixelOps(pixelOps_)
800 {
801 }
802 
803 template<typename Pixel>
805  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
806 {
807  size_t i = 0, j = 0;
808  for (/* */; i < (width - 2); i += 3, j += 4) {
809  out[i + 0] = pixelOps.template blend2<3, 1>(&in[j + 0]);
810  out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 1]);
811  out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 2]);
812  }
813  if ((i + 0) < width) out[i + 0] = 0;
814  if ((i + 1) < width) out[i + 1] = 0;
815 }
816 
817 
818 template<typename Pixel>
820  : pixelOps(pixelOps_)
821 {
822 }
823 
824 template<typename Pixel>
826  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
827 {
828  size_t i = 0, j = 0;
829  for (/* */; i < (width - 2); i += 3, j += 8) {
830  out[i + 0] = pixelOps.template blend3<3, 3, 2> (&in[j + 0]);
831  out[i + 1] = pixelOps.template blend4<1, 3, 3, 1>(&in[j + 2]);
832  out[i + 2] = pixelOps.template blend3<2, 3, 3> (&in[j + 5]);
833  }
834  if ((i + 0) < width) out[i + 0] = 0;
835  if ((i + 1) < width) out[i + 1] = 0;
836 }
837 
838 
839 template<typename Pixel>
841  : pixelOps(pixelOps_)
842 {
843 }
844 
845 template<typename Pixel>
847  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
848 {
849  size_t i = 0, j = 0;
850  for (/* */; i < (width - 8); i += 9, j += 2) {
851  out[i + 0] = in[j + 0];
852  out[i + 1] = in[j + 0];
853  out[i + 2] = in[j + 0];
854  out[i + 3] = in[j + 0];
855  out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 0]);
856  out[i + 5] = in[j + 1];
857  out[i + 6] = in[j + 1];
858  out[i + 7] = in[j + 1];
859  out[i + 8] = in[j + 1];
860  }
861  if ((i + 0) < width) out[i + 0] = 0;
862  if ((i + 1) < width) out[i + 1] = 0;
863  if ((i + 2) < width) out[i + 2] = 0;
864  if ((i + 3) < width) out[i + 3] = 0;
865  if ((i + 4) < width) out[i + 4] = 0;
866  if ((i + 5) < width) out[i + 5] = 0;
867  if ((i + 6) < width) out[i + 6] = 0;
868  if ((i + 7) < width) out[i + 7] = 0;
869 }
870 
871 
872 template<typename Pixel>
874  : pixelOps(pixelOps_)
875 {
876 }
877 
878 template<typename Pixel>
880  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
881 {
882  size_t i = 0, j = 0;
883  for (/* */; i < (width - 8); i += 9, j += 4) {
884  out[i + 0] = in[j + 0];
885  out[i + 1] = in[j + 0];
886  out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 0]);
887  out[i + 3] = in[j + 1];
888  out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 1]);
889  out[i + 5] = in[j + 2];
890  out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 2]);
891  out[i + 7] = in[j + 3];
892  out[i + 8] = in[j + 3];
893  }
894  if ((i + 0) < width) out[i + 0] = 0;
895  if ((i + 1) < width) out[i + 1] = 0;
896  if ((i + 2) < width) out[i + 2] = 0;
897  if ((i + 3) < width) out[i + 3] = 0;
898  if ((i + 4) < width) out[i + 4] = 0;
899  if ((i + 5) < width) out[i + 5] = 0;
900  if ((i + 6) < width) out[i + 6] = 0;
901  if ((i + 7) < width) out[i + 7] = 0;
902 }
903 
904 
905 template<typename Pixel>
907  : pixelOps(pixelOps_)
908 {
909 }
910 
911 template<typename Pixel>
913  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
914 {
915  size_t i = 0, j = 0;
916  for (/* */; i < (width - 8); i += 9, j += 8) {
917  out[i + 0] = in[j + 0];
918  out[i + 1] = pixelOps.template blend2<1, 7>(&in[j + 0]);
919  out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 1]);
920  out[i + 3] = pixelOps.template blend2<3, 5>(&in[j + 2]);
921  out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 3]);
922  out[i + 5] = pixelOps.template blend2<5, 3>(&in[j + 4]);
923  out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 5]);
924  out[i + 7] = pixelOps.template blend2<7, 1>(&in[j + 6]);
925  out[i + 8] = in[j + 7];
926  }
927  if ((i + 0) < width) out[i + 0] = 0;
928  if ((i + 1) < width) out[i + 1] = 0;
929  if ((i + 2) < width) out[i + 2] = 0;
930  if ((i + 3) < width) out[i + 3] = 0;
931  if ((i + 4) < width) out[i + 4] = 0;
932  if ((i + 5) < width) out[i + 5] = 0;
933  if ((i + 6) < width) out[i + 6] = 0;
934  if ((i + 7) < width) out[i + 7] = 0;
935 }
936 
937 
938 template<typename Pixel>
940  : pixelOps(pixelOps_)
941 {
942 }
943 
944 template<typename Pixel>
946  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
947 {
948  assert((width % 5) == 0);
949  for (size_t i = 0, j = 0; i < width; i += 5, j += 4) {
950  out[i + 0] = in[j + 0];
951  out[i + 1] = pixelOps.template blend2<1, 3>(&in[j + 0]);
952  out[i + 2] = pixelOps.template blend2<1, 1>(&in[j + 1]);
953  out[i + 3] = pixelOps.template blend2<3, 1>(&in[j + 2]);
954  out[i + 4] = in[j + 3];
955  }
956 }
957 
958 
959 template<typename Pixel>
961  : pixelOps(pixelOps_)
962 {
963 }
964 
965 template<typename Pixel>
967  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
968 {
969  assert((width % 8) == 0);
970  for (size_t i = 0, j = 0; i < width; i += 8, j += 7) {
971  out[i + 0] = in[j + 0];
972  out[i + 1] = pixelOps.template blend2<1, 6>(&in[j + 0]);
973  out[i + 2] = pixelOps.template blend2<2, 5>(&in[j + 1]);
974  out[i + 3] = pixelOps.template blend2<3, 4>(&in[j + 2]);
975  out[i + 4] = pixelOps.template blend2<4, 3>(&in[j + 3]);
976  out[i + 5] = pixelOps.template blend2<5, 2>(&in[j + 4]);
977  out[i + 6] = pixelOps.template blend2<6, 1>(&in[j + 5]);
978  out[i + 7] = in[j + 6];
979  }
980 }
981 
982 
983 template<typename Pixel>
985  : pixelOps(pixelOps_)
986 {
987 }
988 
989 template<typename Pixel>
991  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
992 {
993  assert((width % 20) == 0);
994  for (size_t i = 0, j = 0; i < width; i += 20, j += 17) {
995  out[i + 0] = in[j + 0];
996  out[i + 1] = pixelOps.template blend2< 3, 14>(&in[j + 0]);
997  out[i + 2] = pixelOps.template blend2< 6, 11>(&in[j + 1]);
998  out[i + 3] = pixelOps.template blend2< 9, 8>(&in[j + 2]);
999  out[i + 4] = pixelOps.template blend2<12, 5>(&in[j + 3]);
1000  out[i + 5] = pixelOps.template blend2<15, 2>(&in[j + 4]);
1001  out[i + 6] = in[j + 5];
1002  out[i + 7] = pixelOps.template blend2< 1, 16>(&in[j + 5]);
1003  out[i + 8] = pixelOps.template blend2< 4, 13>(&in[j + 6]);
1004  out[i + 9] = pixelOps.template blend2< 7, 10>(&in[j + 7]);
1005  out[i + 10] = pixelOps.template blend2<10, 7>(&in[j + 8]);
1006  out[i + 11] = pixelOps.template blend2<13, 4>(&in[j + 9]);
1007  out[i + 12] = pixelOps.template blend2<16, 1>(&in[j + 10]);
1008  out[i + 13] = in[j + 11];
1009  out[i + 14] = pixelOps.template blend2< 2, 15>(&in[j + 11]);
1010  out[i + 15] = pixelOps.template blend2< 5, 12>(&in[j + 12]);
1011  out[i + 16] = pixelOps.template blend2< 8, 9>(&in[j + 13]);
1012  out[i + 17] = pixelOps.template blend2<11, 6>(&in[j + 14]);
1013  out[i + 18] = pixelOps.template blend2<14, 3>(&in[j + 15]);
1014  out[i + 19] = in[j + 16];
1015  }
1016 }
1017 
1018 
1019 template<typename Pixel>
1021  : pixelOps(pixelOps_)
1022 {
1023 }
1024 
1025 template<typename Pixel>
1027  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
1028 {
1029  assert((width % 10) == 0);
1030  for (size_t i = 0, j = 0; i < width; i += 10, j += 9) {
1031  out[i + 0] = in[j + 0];
1032  out[i + 1] = pixelOps.template blend2<1, 8>(&in[j + 0]);
1033  out[i + 2] = pixelOps.template blend2<2, 7>(&in[j + 1]);
1034  out[i + 3] = pixelOps.template blend2<3, 6>(&in[j + 2]);
1035  out[i + 4] = pixelOps.template blend2<4, 5>(&in[j + 3]);
1036  out[i + 5] = pixelOps.template blend2<5, 4>(&in[j + 4]);
1037  out[i + 6] = pixelOps.template blend2<6, 3>(&in[j + 5]);
1038  out[i + 7] = pixelOps.template blend2<7, 2>(&in[j + 6]);
1039  out[i + 8] = pixelOps.template blend2<8, 1>(&in[j + 7]);
1040  out[i + 9] = in[j + 8];
1041  }
1042 }
1043 
1044 
1045 template<typename Pixel, unsigned w1, unsigned w2>
1047  : pixelOps(pixelOps_)
1048 {
1049 }
1050 
1051 template<typename Pixel, unsigned w1, unsigned w2>
1053  const Pixel* in1, const Pixel* in2, Pixel* out, size_t width)
1054 {
1055  // It _IS_ allowed that the output is the same as one of the inputs.
1056  // TODO SSE optimizations
1057  // pure C++ version
1058  for (auto i : xrange(width)) {
1059  out[i] = pixelOps.template blend<w1, w2>(in1[i], in2[i]);
1060  }
1061 }
1062 
1063 
1064 template<typename Pixel>
1066  : pixelOps(pixelOps_)
1067 {
1068 }
1069 
1070 template<typename Pixel>
1072  const Pixel* in, unsigned inWidth,
1073  Pixel* out, unsigned outWidth) const
1074 {
1075  constexpr unsigned FACTOR = 256;
1076 
1077  unsigned step = FACTOR * inWidth / outWidth;
1078  unsigned i = 0 * FACTOR;
1079  for (auto o : xrange(outWidth)) {
1080  Pixel p0 = in[(i / FACTOR) + 0];
1081  Pixel p1 = in[(i / FACTOR) + 1];
1082  out[o] = pixelOps.lerp(p0, p1, i % FACTOR);
1083  i += step;
1084  }
1085 }
1086 
1087 
1088 template<typename Pixel>
1090  : pixelOps(pixelOps_)
1091 {
1092 }
1093 
1094 template<typename Pixel>
1096  const Pixel* in1, const Pixel* in2, Pixel* out, size_t width)
1097 {
1098  // It _IS_ allowed that the output is the same as one of the inputs.
1099  for (auto i : xrange(width)) {
1100  out[i] = pixelOps.alphaBlend(in1[i], in2[i]);
1101  }
1102 }
1103 
1104 template<typename Pixel>
1106  Pixel in1, const Pixel* in2, Pixel* out, size_t width)
1107 {
1108  // It _IS_ allowed that the output is the same as the input.
1109 
1110  // ATM this routine is only called when 'in1' is not fully opaque nor
1111  // fully transparent. This cannot happen in 16bpp modes.
1112  assert(sizeof(Pixel) == 4);
1113 
1114  unsigned alpha = pixelOps.alpha(in1);
1115 
1116  // When one of the two colors is loop-invariant, using the
1117  // pre-multiplied-alpha-blending equation is a tiny bit more efficient
1118  // than using alphaBlend() or even lerp().
1119  // for (auto i : xrange(width)) {
1120  // out[i] = pixelOps.lerp(in1, in2[i], alpha);
1121  // }
1122  Pixel in1M = pixelOps.multiply(in1, alpha);
1123  unsigned alpha2 = 256 - alpha;
1124  for (auto i : xrange(width)) {
1125  out[i] = in1M + pixelOps.multiply(in2[i], alpha2);
1126  }
1127 }
1128 
1129 } // namespace openmsx
1130 
1131 #endif
TclObject t
AlphaBlendLines functor Generate an output line that is a per-pixel-alpha-blend of the two input line...
Definition: LineScalers.hh:259
AlphaBlendLines(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in1, const Pixel *in2, Pixel *out, size_t width)
BlendLines functor Generate an output line that is an interpolation of two input lines.
Definition: LineScalers.hh:227
BlendLines(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in1, const Pixel *in2, Pixel *out, size_t width)
Polymorphic line scaler.
Definition: LineScalers.hh:285
virtual bool isCopy() const =0
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
virtual void operator()(const Pixel *in, Pixel *out, size_t outWidth)=0
Actually scale a line.
Like PolyScale above, but instead keeps a reference to the actual scaler.
Definition: LineScalers.hh:340
bool isCopy() const override
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
Definition: LineScalers.hh:350
void operator()(const Pixel *in, Pixel *out, size_t outWidth) override
Actually scale a line.
Definition: LineScalers.hh:346
PolyScaleRef(Scaler &scaler_)
Definition: LineScalers.hh:342
Polymorphic wrapper around another line scaler.
Definition: LineScalers.hh:313
PolyScale(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:319
void operator()(const Pixel *in, Pixel *out, size_t outWidth) override
Actually scale a line.
Definition: LineScalers.hh:323
bool isCopy() const override
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
Definition: LineScalers.hh:327
Scale_17on20(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:984
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:990
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:519
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:455
Scale_XonY functors Transforms an input line of pixel to an output line (possibly) with a different w...
Definition: LineScalers.hh:36
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:378
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:384
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:390
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:638
Scale_2on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:541
Scale_2on3(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:777
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:783
Scale_2on9(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:840
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:846
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:698
Scale_3on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:692
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:714
Scale_3on2(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:708
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:733
Scale_3on4(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:727
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:756
Scale_3on8(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:750
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:682
Scale_4on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:676
Scale_4on3(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:798
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:804
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:945
Scale_4on5(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:939
Scale_4on9(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:873
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:879
Scale_6on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:660
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:666
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:966
Scale_7on8(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:960
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:825
Scale_8on3(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:819
Scale_8on9(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:906
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:912
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_9on10(PixelOperations< Pixel > pixelOps)
Abstract base class for scalers.
Definition: Scaler.hh:14
Stretch (or zoom) a given input line to a wider output line.
Definition: LineScalers.hh:240
void operator()(const Pixel *in, unsigned inWidth, Pixel *out, unsigned outWidth) const
ZoomLine(PixelOperations< Pixel > pixelOps)
constexpr auto step
Definition: eeprom.cc:9
imat3 l3(ivec3(0, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
#define likely(x)
Definition: likely.hh:14
This file implemented 3 utility functions:
Definition: Autofire.cc:5
constexpr unsigned N
Definition: ResampleHQ.cc:229
uint32_t Pixel
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:1414
constexpr nibble mask[4][13]
Definition: RP5C01.cc:33
size_t size(std::string_view utf8)
#define UNREACHABLE
Definition: unreachable.hh:38
constexpr auto xrange(T e)
Definition: xrange.hh:155
auto end(const zstring_view &x)
Definition: zstring_view.hh:83