openMSX
LineScalers.hh
Go to the documentation of this file.
1 #ifndef LINESCALERS_HH
2 #define LINESCALERS_HH
3 
4 #include "PixelOperations.hh"
5 #include "likely.hh"
6 #include <type_traits>
7 #include <cstring>
8 #include <cassert>
9 #ifdef __SSE2__
10 #include "emmintrin.h"
11 #endif
12 #ifdef __SSSE3__
13 #include "tmmintrin.h"
14 #endif
15 
16 namespace openmsx {
17 
18 // Tag classes
19 struct TagCopy {};
20 template <typename CLASS, typename TAG> struct IsTagged
21  : std::is_base_of<TAG, CLASS> {};
22 
23 
24 // Scalers
25 
33 template <typename Pixel> class Scale_1on3
34 {
35 public:
36  void operator()(const Pixel* in, Pixel* out, size_t width);
37 };
38 
39 template <typename Pixel> class Scale_1on4
40 {
41 public:
42  void operator()(const Pixel* in, Pixel* out, size_t width);
43 };
44 
45 template <typename Pixel> class Scale_1on6
46 {
47 public:
48  void operator()(const Pixel* in, Pixel* out, size_t width);
49 };
50 
51 template <typename Pixel> class Scale_1on2
52 {
53 public:
54  void operator()(const Pixel* in, Pixel* out, size_t width);
55 };
56 
57 template <typename Pixel> class Scale_1on1 : public TagCopy
58 {
59 public:
60  void operator()(const Pixel* in, Pixel* out, size_t width);
61 };
62 
63 template <typename Pixel> class Scale_2on1
64 {
65 public:
66  explicit Scale_2on1(PixelOperations<Pixel> pixelOps);
67  void operator()(const Pixel* in, Pixel* out, size_t width);
68 private:
69  PixelOperations<Pixel> pixelOps;
70 };
71 
72 template <typename Pixel> class Scale_6on1
73 {
74 public:
75  explicit Scale_6on1(PixelOperations<Pixel> pixelOps);
76  void operator()(const Pixel* in, Pixel* out, size_t width);
77 private:
78  PixelOperations<Pixel> pixelOps;
79 };
80 
81 template <typename Pixel> class Scale_4on1
82 {
83 public:
84  explicit Scale_4on1(PixelOperations<Pixel> pixelOps);
85  void operator()(const Pixel* in, Pixel* out, size_t width);
86 private:
87  PixelOperations<Pixel> pixelOps;
88 };
89 
90 template <typename Pixel> class Scale_3on1
91 {
92 public:
93  explicit Scale_3on1(PixelOperations<Pixel> pixelOps);
94  void operator()(const Pixel* in, Pixel* out, size_t width);
95 private:
96  PixelOperations<Pixel> pixelOps;
97 };
98 
99 template <typename Pixel> class Scale_3on2
100 {
101 public:
102  explicit Scale_3on2(PixelOperations<Pixel> pixelOps);
103  void operator()(const Pixel* in, Pixel* out, size_t width);
104 private:
105  PixelOperations<Pixel> pixelOps;
106 };
107 
108 template <typename Pixel> class Scale_3on4
109 {
110 public:
111  explicit Scale_3on4(PixelOperations<Pixel> pixelOps);
112  void operator()(const Pixel* in, Pixel* out, size_t width);
113 private:
114  PixelOperations<Pixel> pixelOps;
115 };
116 
117 template <typename Pixel> class Scale_3on8
118 {
119 public:
120  explicit Scale_3on8(PixelOperations<Pixel> pixelOps);
121  void operator()(const Pixel* in, Pixel* out, size_t width);
122 private:
123  PixelOperations<Pixel> pixelOps;
124 };
125 
126 template <typename Pixel> class Scale_2on3
127 {
128 public:
129  explicit Scale_2on3(PixelOperations<Pixel> pixelOps);
130  void operator()(const Pixel* in, Pixel* out, size_t width);
131 private:
132  PixelOperations<Pixel> pixelOps;
133 };
134 
135 template <typename Pixel> class Scale_4on3
136 {
137 public:
138  explicit Scale_4on3(PixelOperations<Pixel> pixelOps);
139  void operator()(const Pixel* in, Pixel* out, size_t width);
140 private:
141  PixelOperations<Pixel> pixelOps;
142 };
143 
144 template <typename Pixel> class Scale_8on3
145 {
146 public:
147  explicit Scale_8on3(PixelOperations<Pixel> pixelOps);
148  void operator()(const Pixel* in, Pixel* out, size_t width);
149 private:
150  PixelOperations<Pixel> pixelOps;
151 };
152 
153 template <typename Pixel> class Scale_2on9
154 {
155 public:
156  explicit Scale_2on9(PixelOperations<Pixel> pixelOps);
157  void operator()(const Pixel* in, Pixel* out, size_t width);
158 private:
159  PixelOperations<Pixel> pixelOps;
160 };
161 
162 template <typename Pixel> class Scale_4on9
163 {
164 public:
165  explicit Scale_4on9(PixelOperations<Pixel> pixelOps);
166  void operator()(const Pixel* in, Pixel* out, size_t width);
167 private:
168  PixelOperations<Pixel> pixelOps;
169 };
170 
171 template <typename Pixel> class Scale_8on9
172 {
173 public:
174  explicit Scale_8on9(PixelOperations<Pixel> pixelOps);
175  void operator()(const Pixel* in, Pixel* out, size_t width);
176 private:
177  PixelOperations<Pixel> pixelOps;
178 };
179 
180 template <typename Pixel> class Scale_4on5
181 {
182 public:
183  explicit Scale_4on5(PixelOperations<Pixel> pixelOps);
184  void operator()(const Pixel* in, Pixel* out, size_t width);
185 private:
186  PixelOperations<Pixel> pixelOps;
187 };
188 
189 template <typename Pixel> class Scale_7on8
190 {
191 public:
192  explicit Scale_7on8(PixelOperations<Pixel> pixelOps);
193  void operator()(const Pixel* in, Pixel* out, size_t width);
194 private:
195  PixelOperations<Pixel> pixelOps;
196 };
197 
198 template <typename Pixel> class Scale_17on20
199 {
200 public:
201  explicit Scale_17on20(PixelOperations<Pixel> pixelOps);
202  void operator()(const Pixel* in, Pixel* out, size_t width);
203 private:
204  PixelOperations<Pixel> pixelOps;
205 };
206 
207 template <typename Pixel> class Scale_9on10
208 {
209 public:
210  explicit Scale_9on10(PixelOperations<Pixel> pixelOps);
211  void operator()(const Pixel* in, Pixel* out, size_t width);
212 private:
213  PixelOperations<Pixel> pixelOps;
214 };
215 
216 
224 template <typename Pixel, unsigned w1 = 1, unsigned w2 = 1> class BlendLines
225 {
226 public:
227  explicit BlendLines(PixelOperations<Pixel> pixelOps);
228  void operator()(const Pixel* in1, const Pixel* in2,
229  Pixel* out, size_t width);
230 private:
231  PixelOperations<Pixel> pixelOps;
232 };
233 
236 template<typename Pixel>
237 class ZoomLine
238 {
239 public:
240  explicit ZoomLine(PixelOperations<Pixel> pixelOps);
241  void operator()(const Pixel* in, unsigned inWidth,
242  Pixel* out, unsigned outWidth) const;
243 private:
244  PixelOperations<Pixel> pixelOps;
245 };
246 
247 
256 template <typename Pixel> class AlphaBlendLines
257 {
258 public:
259  explicit AlphaBlendLines(PixelOperations<Pixel> pixelOps);
260  void operator()(const Pixel* in1, const Pixel* in2,
261  Pixel* out, size_t width);
262  void operator()(Pixel in1, const Pixel* in2,
263  Pixel* out, size_t width);
264 private:
265  PixelOperations<Pixel> pixelOps;
266 };
267 
268 
281 template<typename Pixel>
283 {
284 public:
293  virtual void operator()(const Pixel* in, Pixel* out, size_t outWidth) = 0;
294 
300  virtual bool isCopy() const = 0;
301 
302 protected:
303  ~PolyLineScaler() = default;
304 };
305 
309 template<typename Pixel, typename Scaler>
310 class PolyScale final : public PolyLineScaler<Pixel>
311 {
312 public:
314  : scaler()
315  {
316  }
318  : scaler(pixelOps)
319  {
320  }
321  void operator()(const Pixel* in, Pixel* out, size_t outWidth) override
322  {
323  scaler(in, out, outWidth);
324  }
325  bool isCopy() const override
326  {
328  }
329 private:
330  Scaler scaler;
331 };
332 
336 template<typename Pixel, typename Scaler>
337 class PolyScaleRef final : public PolyLineScaler<Pixel>
338 {
339 public:
340  explicit PolyScaleRef(Scaler& scaler_)
341  : scaler(scaler_)
342  {
343  }
344  void operator()(const Pixel* in, Pixel* out, size_t outWidth) override
345  {
346  scaler(in, out, outWidth);
347  }
348  bool isCopy() const override
349  {
351  }
352 private:
353  Scaler& scaler;
354 };
355 
356 
357 // implementation
358 
359 template <typename Pixel, unsigned N>
360 static inline void scale_1onN(
361  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
362 {
363  size_t i = 0, j = 0;
364  for (/* */; i < (width - (N - 1)); i += N, j += 1) {
365  Pixel pix = in[j];
366  for (unsigned k = 0; k < N; ++k) {
367  out[i + k] = pix;
368  }
369  }
370  for (unsigned k = 0; k < (N - 1); ++k) {
371  if ((i + k) < width) out[i + k] = 0;
372  }
373 }
374 
375 template <typename Pixel>
376 void Scale_1on3<Pixel>::operator()(const Pixel* in, Pixel* out, size_t width)
377 {
378  scale_1onN<Pixel, 3>(in, out, width);
379 }
380 
381 template <typename Pixel>
382 void Scale_1on4<Pixel>::operator()(const Pixel* in, Pixel* out, size_t width)
383 {
384  scale_1onN<Pixel, 4>(in, out, width);
385 }
386 
387 template <typename Pixel>
388 void Scale_1on6<Pixel>::operator()(const Pixel* in, Pixel* out, size_t width)
389 {
390  scale_1onN<Pixel, 6>(in, out, width);
391 }
392 
393 #ifdef __SSE2__
394 template<typename Pixel> static inline __m128i unpacklo(__m128i x, __m128i y)
395 {
396  if (sizeof(Pixel) == 4) {
397  return _mm_unpacklo_epi32(x, y);
398  } else if (sizeof(Pixel) == 2) {
399  return _mm_unpacklo_epi16(x, y);
400  } else {
401  UNREACHABLE;
402  }
403 }
404 template<typename Pixel> static inline __m128i unpackhi(__m128i x, __m128i y)
405 {
406  if (sizeof(Pixel) == 4) {
407  return _mm_unpackhi_epi32(x, y);
408  } else if (sizeof(Pixel) == 2) {
409  return _mm_unpackhi_epi16(x, y);
410  } else {
411  UNREACHABLE;
412  }
413 }
414 
415 template<typename Pixel>
416 static inline void scale_1on2_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth)
417 {
418  size_t bytes = srcWidth * sizeof(Pixel);
419  assert((bytes % (4 * sizeof(__m128i))) == 0);
420  assert(bytes != 0);
421 
422  auto* in = reinterpret_cast<const char*>(in_) + bytes;
423  auto* out = reinterpret_cast< char*>(out_) + 2 * bytes;
424 
425  auto x = -ptrdiff_t(bytes);
426  do {
427  __m128i a0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 0));
428  __m128i a1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 16));
429  __m128i a2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 32));
430  __m128i a3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + x + 48));
431  __m128i l0 = unpacklo<Pixel>(a0, a0);
432  __m128i h0 = unpackhi<Pixel>(a0, a0);
433  __m128i l1 = unpacklo<Pixel>(a1, a1);
434  __m128i h1 = unpackhi<Pixel>(a1, a1);
435  __m128i l2 = unpacklo<Pixel>(a2, a2);
436  __m128i h2 = unpackhi<Pixel>(a2, a2);
437  __m128i l3 = unpacklo<Pixel>(a3, a3);
438  __m128i h3 = unpackhi<Pixel>(a3, a3);
439  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 0), l0);
440  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 16), h0);
441  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 32), l1);
442  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 48), h1);
443  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 64), l2);
444  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 80), h2);
445  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 96), l3);
446  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + 2*x + 112), h3);
447  x += 4 * sizeof(__m128i);
448  } while (x < 0);
449 }
450 #endif
451 
452 template <typename Pixel>
454  const Pixel* __restrict in, Pixel* __restrict out, size_t dstWidth)
455 {
456  // This is a fairly simple algorithm (output each input pixel twice).
457  // An ideal compiler should generate optimal (vector) code for it.
458  // I checked the 2013-05-29 dev snapshots of gcc-4.9 and clang-3.4:
459  // - Clang is not able to vectorize this loop. My best tuned C version
460  // of this routine is a little over 4x slower than the tuned
461  // SSE-intrinsics version.
462  // - Gcc can auto-vectorize this routine. Though my best tuned version
463  // (I mean tuned to further improve the auto-vectorization, including
464  // using the new __builtin_assume_aligned() instrinsic) still runs
465  // approx 40% slower than the intrinsics version.
466  // Hopefully in some years the compilers have improved further so that
467  // the instrinsic version is no longer needed.
468  size_t srcWidth = dstWidth / 2;
469 
470 #ifdef __SSE2__
471  size_t chunk = 4 * sizeof(__m128i) / sizeof(Pixel);
472  size_t srcWidth2 = srcWidth & ~(chunk - 1);
473  scale_1on2_SSE(in, out, srcWidth2);
474  in += srcWidth2;
475  out += 2 * srcWidth2;
476  srcWidth -= srcWidth2;
477 #endif
478 
479  // C++ version. Used both on non-x86 machines and (possibly) on x86 for
480  // the last few pixels of the line.
481  for (size_t x = 0; x < srcWidth; ++x) {
482  out[x * 2] = out[x * 2 + 1] = in[x];
483  }
484 }
485 
486 #ifdef __SSE2__
487 // Memcpy-like routine, it can be faster than a generic memcpy because:
488 // - It requires that both input and output are 16-bytes aligned.
489 // - It can only copy (non-zero) integer multiples of 128 bytes.
490 static inline void memcpy_SSE_128(
491  const void* __restrict in_, void* __restrict out_, size_t size)
492 {
493  assert((reinterpret_cast<size_t>(in_ ) % 16) == 0);
494  assert((reinterpret_cast<size_t>(out_) % 16) == 0);
495  assert((size % 128) == 0);
496  assert(size != 0);
497 
498  auto* in = reinterpret_cast<const __m128i*>(in_);
499  auto* out = reinterpret_cast< __m128i*>(out_);
500  auto* end = in + (size / sizeof(__m128i));
501  do {
502  out[0] = in[0];
503  out[1] = in[1];
504  out[2] = in[2];
505  out[3] = in[3];
506  out[4] = in[4];
507  out[5] = in[5];
508  out[6] = in[6];
509  out[7] = in[7];
510  in += 8;
511  out += 8;
512  } while (in != end);
513 }
514 #endif
515 
516 template <typename Pixel>
518  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
519 {
520  size_t nBytes = width * sizeof(Pixel);
521 
522 #ifdef __SSE2__
523  // When using a very recent gcc/clang, this routine is only about
524  // 10% faster than a simple memcpy(). When using gcc-4.6 (still the
525  // default on many systems), it's still about 66% faster.
526  size_t n128 = nBytes & ~127;
527  memcpy_SSE_128(in, out, n128); // copy 128 byte chunks
528  nBytes &= 127; // remaning bytes (if any)
529  if (likely(nBytes == 0)) return;
530  in += n128 / sizeof(Pixel);
531  out += n128 / sizeof(Pixel);
532 #endif
533 
534  memcpy(out, in, nBytes);
535 }
536 
537 
538 template <typename Pixel>
540  : pixelOps(pixelOps_)
541 {
542 }
543 
544 #ifdef __SSE2__
545 template<int IMM8> static inline __m128i shuffle(__m128i x, __m128i y)
546 {
547  return _mm_castps_si128(_mm_shuffle_ps(
548  _mm_castsi128_ps(x), _mm_castsi128_ps(y), IMM8));
549 }
550 
551 template<typename Pixel>
552 static inline __m128i blend(__m128i x, __m128i y, Pixel mask)
553 {
554  if (sizeof(Pixel) == 4) {
555  // 32bpp
556  __m128i p = shuffle<0x88>(x, y);
557  __m128i q = shuffle<0xDD>(x, y);
558  return _mm_avg_epu8(p, q);
559  } else {
560  // 16bpp, first shuffle odd/even pixels in the right position
561 #ifdef __SSSE3__
562  // This can be done faster using SSSE3
563  const __m128i LL = _mm_set_epi8(
564  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
565  0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00);
566  const __m128i HL = _mm_set_epi8(
567  0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00,
568  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
569  const __m128i LH = _mm_set_epi8(
570  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
571  0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02);
572  const __m128i HH = _mm_set_epi8(
573  0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02,
574  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
575  __m128i ll = _mm_shuffle_epi8(x, LL);
576  __m128i hl = _mm_shuffle_epi8(y, HL);
577  __m128i lh = _mm_shuffle_epi8(x, LH);
578  __m128i hh = _mm_shuffle_epi8(y, HH);
579  __m128i p = _mm_or_si128(ll, hl);
580  __m128i q = _mm_or_si128(lh, hh);
581 #else
582  // For SSE2 this only generates 1 instruction more, but with
583  // longer dependency chains
584  __m128i s = _mm_unpacklo_epi16(x, y);
585  __m128i t = _mm_unpackhi_epi16(x, y);
586  __m128i u = _mm_unpacklo_epi16(s, t);
587  __m128i v = _mm_unpackhi_epi16(s, t);
588  __m128i p = _mm_unpacklo_epi16(u, v);
589  __m128i q = _mm_unpackhi_epi16(u, v);
590 #endif
591  // Actually blend: (p & q) + (((p ^ q) & mask) >> 1)
592  __m128i m = _mm_set1_epi16(mask);
593  __m128i a = _mm_and_si128(p, q);
594  __m128i b = _mm_xor_si128(p, q);
595  __m128i c = _mm_and_si128(b, m);
596  __m128i d = _mm_srli_epi16(c, 1);
597  return _mm_add_epi16(a, d);
598  }
599 }
600 
601 template<typename Pixel>
602 static inline void scale_2on1_SSE(
603  const Pixel* __restrict in_, Pixel* __restrict out_, size_t dstBytes,
604  Pixel mask)
605 {
606  assert((dstBytes % (4 * sizeof(__m128i))) == 0);
607  assert(dstBytes != 0);
608 
609  auto* in = reinterpret_cast<const char*>(in_) + 2 * dstBytes;
610  auto* out = reinterpret_cast< char*>(out_) + dstBytes;
611 
612  auto x = -ptrdiff_t(dstBytes);
613  do {
614  __m128i a0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 0));
615  __m128i a1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 16));
616  __m128i a2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 32));
617  __m128i a3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 48));
618  __m128i a4 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 64));
619  __m128i a5 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 80));
620  __m128i a6 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 96));
621  __m128i a7 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(in + 2*x + 112));
622  __m128i b0 = blend(a0, a1, mask);
623  __m128i b1 = blend(a2, a3, mask);
624  __m128i b2 = blend(a4, a5, mask);
625  __m128i b3 = blend(a6, a7, mask);
626  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 0), b0);
627  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 16), b1);
628  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 32), b2);
629  _mm_storeu_si128(reinterpret_cast<__m128i*>(out + x + 48), b3);
630  x += 4 * sizeof(__m128i);
631  } while (x < 0);
632 }
633 #endif
634 
635 template <typename Pixel>
637  const Pixel* __restrict in, Pixel* __restrict out, size_t dstWidth)
638 {
639 #ifdef __SSE2__
640  size_t n64 = (dstWidth * sizeof(Pixel)) & ~63;
641  Pixel mask = pixelOps.getBlendMask();
642  scale_2on1_SSE(in, out, n64, mask); // process 64 byte chunks
643  dstWidth &= ((64 / sizeof(Pixel)) - 1); // remaning pixels (if any)
644  if (likely(dstWidth == 0)) return;
645  in += (2 * n64) / sizeof(Pixel);
646  out += n64 / sizeof(Pixel);
647 #endif
648 
649  // pure C++ version
650  for (size_t i = 0; i < dstWidth; ++i) {
651  out[i] = pixelOps.template blend<1, 1>(
652  in[2 * i + 0], in[2 * i + 1]);
653  }
654 }
655 
656 
657 template <typename Pixel>
659  : pixelOps(pixelOps_)
660 {
661 }
662 
663 template <typename Pixel>
665  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
666 {
667  for (size_t i = 0; i < width; ++i) {
668  out[i] = pixelOps.template blend6<1, 1, 1, 1, 1, 1>(&in[6 * i]);
669  }
670 }
671 
672 
673 template <typename Pixel>
675  : pixelOps(pixelOps_)
676 {
677 }
678 
679 template <typename Pixel>
681  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
682 {
683  for (size_t i = 0; i < width; ++i) {
684  out[i] = pixelOps.template blend4<1, 1, 1, 1>(&in[4 * i]);
685  }
686 }
687 
688 
689 template <typename Pixel>
691  : pixelOps(pixelOps_)
692 {
693 }
694 
695 template <typename Pixel>
697  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
698 {
699  for (size_t i = 0; i < width; ++i) {
700  out[i] = pixelOps.template blend3<1, 1, 1>(&in[3 * i]);
701  }
702 }
703 
704 
705 template <typename Pixel>
707  : pixelOps(pixelOps_)
708 {
709 }
710 
711 template <typename Pixel>
713  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
714 {
715  size_t i = 0, j = 0;
716  for (/* */; i < (width - 1); i += 2, j += 3) {
717  out[i + 0] = pixelOps.template blend2<2, 1>(&in[j + 0]);
718  out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 1]);
719  }
720  if (i < width) out[i] = 0;
721 }
722 
723 
724 template <typename Pixel>
726  : pixelOps(pixelOps_)
727 {
728 }
729 
730 template <typename Pixel>
732  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
733 {
734  size_t i = 0, j = 0;
735  for (/* */; i < (width - 3); i += 4, j += 3) {
736  out[i + 0] = in[j + 0];
737  out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 0]);
738  out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 1]);
739  out[i + 3] = in[j + 2];
740  }
741  for (size_t k = 0; k < (4 - 1); ++k) {
742  if ((i + k) < width) out[i + k] = 0;
743  }
744 }
745 
746 
747 template <typename Pixel>
749  : pixelOps(pixelOps_)
750 {
751 }
752 
753 template <typename Pixel>
755  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
756 {
757  size_t i = 0, j = 0;
758  for (/* */; i < (width - 7); i += 8, j += 3) {
759  out[i + 0] = in[j + 0];
760  out[i + 1] = in[j + 0];
761  out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 0]);
762  out[i + 3] = in[j + 1];
763  out[i + 4] = in[j + 1];
764  out[i + 5] = pixelOps.template blend2<1, 2>(&in[j + 1]);
765  out[i + 6] = in[j + 2];
766  out[i + 7] = in[j + 2];
767  }
768  for (size_t k = 0; k < (8 - 1); ++k) {
769  if ((i + k) < width) out[i + k] = 0;
770  }
771 }
772 
773 
774 template <typename Pixel>
776  : pixelOps(pixelOps_)
777 {
778 }
779 
780 template <typename Pixel>
782  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
783 {
784  size_t i = 0, j = 0;
785  for (/* */; i < (width - 2); i += 3, j += 2) {
786  out[i + 0] = in[j + 0];
787  out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 0]);
788  out[i + 2] = in[j + 1];
789  }
790  if ((i + 0) < width) out[i + 0] = 0;
791  if ((i + 1) < width) out[i + 1] = 0;
792 }
793 
794 
795 template <typename Pixel>
797  : pixelOps(pixelOps_)
798 {
799 }
800 
801 template <typename Pixel>
803  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
804 {
805  size_t i = 0, j = 0;
806  for (/* */; i < (width - 2); i += 3, j += 4) {
807  out[i + 0] = pixelOps.template blend2<3, 1>(&in[j + 0]);
808  out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 1]);
809  out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 2]);
810  }
811  if ((i + 0) < width) out[i + 0] = 0;
812  if ((i + 1) < width) out[i + 1] = 0;
813 }
814 
815 
816 template <typename Pixel>
818  : pixelOps(pixelOps_)
819 {
820 }
821 
822 template <typename Pixel>
824  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
825 {
826  size_t i = 0, j = 0;
827  for (/* */; i < (width - 2); i += 3, j += 8) {
828  out[i + 0] = pixelOps.template blend3<3, 3, 2> (&in[j + 0]);
829  out[i + 1] = pixelOps.template blend4<1, 3, 3, 1>(&in[j + 2]);
830  out[i + 2] = pixelOps.template blend3<2, 3, 3> (&in[j + 5]);
831  }
832  if ((i + 0) < width) out[i + 0] = 0;
833  if ((i + 1) < width) out[i + 1] = 0;
834 }
835 
836 
837 template <typename Pixel>
839  : pixelOps(pixelOps_)
840 {
841 }
842 
843 template <typename Pixel>
845  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
846 {
847  size_t i = 0, j = 0;
848  for (/* */; i < (width - 8); i += 9, j += 2) {
849  out[i + 0] = in[j + 0];
850  out[i + 1] = in[j + 0];
851  out[i + 2] = in[j + 0];
852  out[i + 3] = in[j + 0];
853  out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 0]);
854  out[i + 5] = in[j + 1];
855  out[i + 6] = in[j + 1];
856  out[i + 7] = in[j + 1];
857  out[i + 8] = in[j + 1];
858  }
859  if ((i + 0) < width) out[i + 0] = 0;
860  if ((i + 1) < width) out[i + 1] = 0;
861  if ((i + 2) < width) out[i + 2] = 0;
862  if ((i + 3) < width) out[i + 3] = 0;
863  if ((i + 4) < width) out[i + 4] = 0;
864  if ((i + 5) < width) out[i + 5] = 0;
865  if ((i + 6) < width) out[i + 6] = 0;
866  if ((i + 7) < width) out[i + 7] = 0;
867 }
868 
869 
870 template <typename Pixel>
872  : pixelOps(pixelOps_)
873 {
874 }
875 
876 template <typename Pixel>
878  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
879 {
880  size_t i = 0, j = 0;
881  for (/* */; i < (width - 8); i += 9, j += 4) {
882  out[i + 0] = in[j + 0];
883  out[i + 1] = in[j + 0];
884  out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 0]);
885  out[i + 3] = in[j + 1];
886  out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 1]);
887  out[i + 5] = in[j + 2];
888  out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 2]);
889  out[i + 7] = in[j + 3];
890  out[i + 8] = in[j + 3];
891  }
892  if ((i + 0) < width) out[i + 0] = 0;
893  if ((i + 1) < width) out[i + 1] = 0;
894  if ((i + 2) < width) out[i + 2] = 0;
895  if ((i + 3) < width) out[i + 3] = 0;
896  if ((i + 4) < width) out[i + 4] = 0;
897  if ((i + 5) < width) out[i + 5] = 0;
898  if ((i + 6) < width) out[i + 6] = 0;
899  if ((i + 7) < width) out[i + 7] = 0;
900 }
901 
902 
903 template <typename Pixel>
905  : pixelOps(pixelOps_)
906 {
907 }
908 
909 template <typename Pixel>
911  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
912 {
913  size_t i = 0, j = 0;
914  for (/* */; i < width; i += 9, j += 8) {
915  out[i + 0] = in[j + 0];
916  out[i + 1] = pixelOps.template blend2<1, 7>(&in[j + 0]);
917  out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 1]);
918  out[i + 3] = pixelOps.template blend2<3, 5>(&in[j + 2]);
919  out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 3]);
920  out[i + 5] = pixelOps.template blend2<5, 3>(&in[j + 4]);
921  out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 5]);
922  out[i + 7] = pixelOps.template blend2<7, 1>(&in[j + 6]);
923  out[i + 8] = in[j + 7];
924  }
925  if ((i + 0) < width) out[i + 0] = 0;
926  if ((i + 1) < width) out[i + 1] = 0;
927  if ((i + 2) < width) out[i + 2] = 0;
928  if ((i + 3) < width) out[i + 3] = 0;
929  if ((i + 4) < width) out[i + 4] = 0;
930  if ((i + 5) < width) out[i + 5] = 0;
931  if ((i + 6) < width) out[i + 6] = 0;
932  if ((i + 7) < width) out[i + 7] = 0;
933 }
934 
935 
936 template <typename Pixel>
938  : pixelOps(pixelOps_)
939 {
940 }
941 
942 template <typename Pixel>
944  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
945 {
946  assert((width % 5) == 0);
947  for (size_t i = 0, j = 0; i < width; i += 5, j += 4) {
948  out[i + 0] = in[j + 0];
949  out[i + 1] = pixelOps.template blend2<1, 3>(&in[j + 0]);
950  out[i + 2] = pixelOps.template blend2<1, 1>(&in[j + 1]);
951  out[i + 3] = pixelOps.template blend2<3, 1>(&in[j + 2]);
952  out[i + 4] = in[j + 3];
953  }
954 }
955 
956 
957 template <typename Pixel>
959  : pixelOps(pixelOps_)
960 {
961 }
962 
963 template <typename Pixel>
965  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
966 {
967  assert((width % 8) == 0);
968  for (size_t i = 0, j = 0; i < width; i += 8, j += 7) {
969  out[i + 0] = in[j + 0];
970  out[i + 1] = pixelOps.template blend2<1, 6>(&in[j + 0]);
971  out[i + 2] = pixelOps.template blend2<2, 5>(&in[j + 1]);
972  out[i + 3] = pixelOps.template blend2<3, 4>(&in[j + 2]);
973  out[i + 4] = pixelOps.template blend2<4, 3>(&in[j + 3]);
974  out[i + 5] = pixelOps.template blend2<5, 2>(&in[j + 4]);
975  out[i + 6] = pixelOps.template blend2<6, 1>(&in[j + 5]);
976  out[i + 7] = in[j + 6];
977  }
978 }
979 
980 
981 template <typename Pixel>
983  : pixelOps(pixelOps_)
984 {
985 }
986 
987 template <typename Pixel>
989  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
990 {
991  assert((width % 20) == 0);
992  for (size_t i = 0, j = 0; i < width; i += 20, j += 17) {
993  out[i + 0] = in[j + 0];
994  out[i + 1] = pixelOps.template blend2< 3, 14>(&in[j + 0]);
995  out[i + 2] = pixelOps.template blend2< 6, 11>(&in[j + 1]);
996  out[i + 3] = pixelOps.template blend2< 9, 8>(&in[j + 2]);
997  out[i + 4] = pixelOps.template blend2<12, 5>(&in[j + 3]);
998  out[i + 5] = pixelOps.template blend2<15, 2>(&in[j + 4]);
999  out[i + 6] = in[j + 5];
1000  out[i + 7] = pixelOps.template blend2< 1, 16>(&in[j + 5]);
1001  out[i + 8] = pixelOps.template blend2< 4, 13>(&in[j + 6]);
1002  out[i + 9] = pixelOps.template blend2< 7, 10>(&in[j + 7]);
1003  out[i + 10] = pixelOps.template blend2<10, 7>(&in[j + 8]);
1004  out[i + 11] = pixelOps.template blend2<13, 4>(&in[j + 9]);
1005  out[i + 12] = pixelOps.template blend2<16, 1>(&in[j + 10]);
1006  out[i + 13] = in[j + 11];
1007  out[i + 14] = pixelOps.template blend2< 2, 15>(&in[j + 11]);
1008  out[i + 15] = pixelOps.template blend2< 5, 12>(&in[j + 12]);
1009  out[i + 16] = pixelOps.template blend2< 8, 9>(&in[j + 13]);
1010  out[i + 17] = pixelOps.template blend2<11, 6>(&in[j + 14]);
1011  out[i + 18] = pixelOps.template blend2<14, 3>(&in[j + 15]);
1012  out[i + 19] = in[j + 16];
1013  }
1014 }
1015 
1016 
1017 template <typename Pixel>
1019  : pixelOps(pixelOps_)
1020 {
1021 }
1022 
1023 template <typename Pixel>
1025  const Pixel* __restrict in, Pixel* __restrict out, size_t width)
1026 {
1027  assert((width % 10) == 0);
1028  for (size_t i = 0, j = 0; i < width; i += 10, j += 9) {
1029  out[i + 0] = in[j + 0];
1030  out[i + 1] = pixelOps.template blend2<1, 8>(&in[j + 0]);
1031  out[i + 2] = pixelOps.template blend2<2, 7>(&in[j + 1]);
1032  out[i + 3] = pixelOps.template blend2<3, 6>(&in[j + 2]);
1033  out[i + 4] = pixelOps.template blend2<4, 5>(&in[j + 3]);
1034  out[i + 5] = pixelOps.template blend2<5, 4>(&in[j + 4]);
1035  out[i + 6] = pixelOps.template blend2<6, 3>(&in[j + 5]);
1036  out[i + 7] = pixelOps.template blend2<7, 2>(&in[j + 6]);
1037  out[i + 8] = pixelOps.template blend2<8, 1>(&in[j + 7]);
1038  out[i + 9] = in[j + 8];
1039  }
1040 }
1041 
1042 
1043 template <typename Pixel, unsigned w1, unsigned w2>
1045  : pixelOps(pixelOps_)
1046 {
1047 }
1048 
1049 template <typename Pixel, unsigned w1, unsigned w2>
1051  const Pixel* in1, const Pixel* in2, Pixel* out, size_t width)
1052 {
1053  // It _IS_ allowed that the output is the same as one of the inputs.
1054  // TODO SSE optimizations
1055  // pure C++ version
1056  for (size_t i = 0; i < width; ++i) {
1057  out[i] = pixelOps.template blend<w1, w2>(in1[i], in2[i]);
1058  }
1059 }
1060 
1061 
1062 template<typename Pixel>
1064  : pixelOps(pixelOps_)
1065 {
1066 }
1067 
1068 template<typename Pixel>
1070  const Pixel* in, unsigned inWidth,
1071  Pixel* out, unsigned outWidth) const
1072 {
1073  static const unsigned FACTOR = 256;
1074 
1075  unsigned step = FACTOR * inWidth / outWidth;
1076  unsigned i = 0 * FACTOR;
1077  for (unsigned o = 0; o < outWidth; ++o) {
1078  Pixel p0 = in[(i / FACTOR) + 0];
1079  Pixel p1 = in[(i / FACTOR) + 1];
1080  out[o] = pixelOps.lerp(p0, p1, i % FACTOR);
1081  i += step;
1082  }
1083 }
1084 
1085 
1086 template <typename Pixel>
1088  : pixelOps(pixelOps_)
1089 {
1090 }
1091 
1092 template <typename Pixel>
1094  const Pixel* in1, const Pixel* in2, Pixel* out, size_t width)
1095 {
1096  // It _IS_ allowed that the output is the same as one of the inputs.
1097  for (size_t i = 0; i < width; ++i) {
1098  out[i] = pixelOps.alphaBlend(in1[i], in2[i]);
1099  }
1100 }
1101 
1102 template <typename Pixel>
1104  Pixel in1, const Pixel* in2, Pixel* out, size_t width)
1105 {
1106  // It _IS_ allowed that the output is the same as the input.
1107 
1108  // ATM this routine is only called when 'in1' is not fully opaque nor
1109  // fully transparent. This cannot happen in 16bpp modes.
1110  assert(sizeof(Pixel) == 4);
1111 
1112  unsigned alpha = pixelOps.alpha(in1);
1113 
1114  // When one of the two colors is loop-invariant, using the
1115  // pre-multiplied-alpha-blending equation is a tiny bit more efficient
1116  // than using alphaBlend() or even lerp().
1117  // for (size_t i = 0; i < width; ++i) {
1118  // out[i] = pixelOps.lerp(in1, in2[i], alpha);
1119  // }
1120  Pixel in1M = pixelOps.multiply(in1, alpha);
1121  unsigned alpha2 = 256 - alpha;
1122  for (size_t i = 0; i < width; ++i) {
1123  out[i] = in1M + pixelOps.multiply(in2[i], alpha2);
1124  }
1125 }
1126 
1127 } // namespace openmsx
1128 
1129 #endif
BlendLines functor Generate an output line that is an iterpolation of two input lines.
Definition: LineScalers.hh:224
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:877
void operator()(const Pixel *in, Pixel *out, size_t outWidth) override
Actually scale a line.
Definition: LineScalers.hh:321
Scale_3on2(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:706
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:664
Abstract base class for scalers.
Scale_4on9(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:871
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:636
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:680
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:731
PolyScaleRef(Scaler &scaler_)
Definition: LineScalers.hh:340
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:964
Stretch (or zoom) a given input line to a wider output line.
Definition: LineScalers.hh:237
Scale_3on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:690
Scale_17on20(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:982
Scale_7on8(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:958
Polymorphic wrapper around another line scaler.
Definition: LineScalers.hh:310
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:754
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:910
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:802
Polymorphic line scaler.
Definition: LineScalers.hh:282
uint32_t Pixel
PolyScale(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:317
ZoomLine(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, unsigned inWidth, Pixel *out, unsigned outWidth) const
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:517
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:943
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:453
bool isCopy() const override
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
Definition: LineScalers.hh:325
Scale_9on10(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:823
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_8on9(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:904
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:988
bool isCopy() const override
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
Definition: LineScalers.hh:348
Scale_2on9(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:838
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:376
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:844
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:712
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:781
Scale_XonY functors Transforms an input line of pixel to an output line (possibly) with a different w...
Definition: LineScalers.hh:33
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:388
Like PolyScale above, but instead keeps a reference to the actual scaler.
Definition: LineScalers.hh:337
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:696
Scale_6on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:658
#define likely(x)
Definition: likely.hh:14
void operator()(const Pixel *in, Pixel *out, size_t outWidth) override
Actually scale a line.
Definition: LineScalers.hh:344
Scale_4on3(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:796
imat3 l3(ivec3(0, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
BlendLines(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in1, const Pixel *in2, Pixel *out, size_t width)
Scale_3on8(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:748
Scale_3on4(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:725
Scale_8on3(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:817
Scale_4on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:674
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.hh:62
Scale_2on1(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:539
void operator()(const Pixel *in, Pixel *out, size_t width)
Definition: LineScalers.hh:382
void operator()(const Pixel *in1, const Pixel *in2, Pixel *out, size_t width)
TclObject t
Scale_4on5(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:937
AlphaBlendLines functor Generate an output line that is a per-pixel-alpha-blend of the two input line...
Definition: LineScalers.hh:256
Scale_2on3(PixelOperations< Pixel > pixelOps)
Definition: LineScalers.hh:775
auto end(const string_view &x)
Definition: string_view.hh:152
AlphaBlendLines(PixelOperations< Pixel > pixelOps)
#define UNREACHABLE
Definition: unreachable.hh:38