openMSX
Simple3xScaler.cc
Go to the documentation of this file.
1 #include "Simple3xScaler.hh"
3 #include "LineScalers.hh"
4 #include "RawFrame.hh"
5 #include "ScalerOutput.hh"
6 #include "RenderSettings.hh"
7 #include "Multiply32.hh"
8 #include "vla.hh"
9 #include <cstdint>
10 #include <memory>
11 #ifdef __SSE2__
12 #include <emmintrin.h>
13 #endif
14 
15 namespace openmsx {
16 
17 template <class Pixel> class Blur_1on3
18 {
19 public:
20  explicit Blur_1on3(const PixelOperations<Pixel>& pixelOps);
21  inline void setBlur(unsigned blur_) { blur = blur_; }
22  void operator()(const Pixel* in, Pixel* out, size_t dstWidth);
23 private:
24  Multiply32<Pixel> mult0;
25  Multiply32<Pixel> mult1;
26  Multiply32<Pixel> mult2;
27  Multiply32<Pixel> mult3;
28  unsigned blur;
29 #ifdef __SSE2__
30  void blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth);
31 #endif
32 };
33 
34 
35 template <class Pixel>
37  const PixelOperations<Pixel>& pixelOps_,
38  const RenderSettings& settings_)
39  : Scaler3<Pixel>(pixelOps_)
40  , pixelOps(pixelOps_)
41  , scanline(pixelOps_)
42  , blur_1on3(std::make_unique<Blur_1on3<Pixel>>(pixelOps_))
43  , settings(settings_)
44 {
45 }
46 
47 template <class Pixel>
49 
50 template <typename Pixel>
52  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
53  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
55 {
56  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
57  int scanlineFactor = settings.getScanlineFactor();
58  unsigned dstWidth = dst.getWidth();
59  unsigned y = dstStartY;
60  auto* srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
61  auto* dstLine0 = dst.acquireLine(y + 0);
62  scale(srcLine, dstLine0, dstWidth);
63 
65  auto* dstLine1 = dst.acquireLine(y + 1);
66  copy(dstLine0, dstLine1, dstWidth);
67 
68  for (/* */; (y + 4) < dstEndY; y += 3, srcStartY += 1) {
69  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
70  auto* dstLine3 = dst.acquireLine(y + 3);
71  scale(srcLine, dstLine3, dstWidth);
72 
73  auto* dstLine4 = dst.acquireLine(y + 4);
74  copy(dstLine3, dstLine4, dstWidth);
75 
76  auto* dstLine2 = dst.acquireLine(y + 2);
77  scanline.draw(dstLine0, dstLine3, dstLine2,
78  scanlineFactor, dstWidth);
79 
80  dst.releaseLine(y + 0, dstLine0);
81  dst.releaseLine(y + 1, dstLine1);
82  dst.releaseLine(y + 2, dstLine2);
83  dstLine0 = dstLine3;
84  dstLine1 = dstLine4;
85  }
86  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
87  VLA_SSE_ALIGNED(Pixel, buf2, dstWidth);
88  scale(srcLine, buf2, dstWidth);
89 
90  auto* dstLine2 = dst.acquireLine(y + 2);
91  scanline.draw(dstLine0, buf2, dstLine2, scanlineFactor, dstWidth);
92  dst.releaseLine(y + 0, dstLine0);
93  dst.releaseLine(y + 1, dstLine1);
94  dst.releaseLine(y + 2, dstLine2);
95 }
96 
97 template <typename Pixel>
99  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
100  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
101  PolyLineScaler<Pixel>& scale)
102 {
103  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
104  int scanlineFactor = settings.getScanlineFactor();
105  unsigned dstWidth = dst.getWidth();
106  for (unsigned srcY = srcStartY, dstY = dstStartY; dstY < dstEndY;
107  srcY += 2, dstY += 3) {
108  auto* srcLine0 = src.getLinePtr(srcY + 0, srcWidth, buf);
109  auto* dstLine0 = dst.acquireLine(dstY + 0);
110  scale(srcLine0, dstLine0, dstWidth);
111 
112  auto* srcLine1 = src.getLinePtr(srcY + 1, srcWidth, buf);
113  auto* dstLine2 = dst.acquireLine(dstY + 2);
114  scale(srcLine1, dstLine2, dstWidth);
115 
116  auto* dstLine1 = dst.acquireLine(dstY + 1);
117  scanline.draw(dstLine0, dstLine2, dstLine1,
118  scanlineFactor, dstWidth);
119 
120  dst.releaseLine(dstY + 0, dstLine0);
121  dst.releaseLine(dstY + 1, dstLine1);
122  dst.releaseLine(dstY + 2, dstLine2);
123  }
124 }
125 
126 template <class Pixel>
128  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
129  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
130 {
132  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
133 }
134 
135 template <class Pixel>
137  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
138  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
139 {
141  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
142 }
143 
144 template <class Pixel>
146  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
147  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
148 {
149  if (unsigned blur = settings.getBlurFactor() / 3) {
150  blur_1on3->setBlur(blur);
151  PolyScaleRef<Pixel, Blur_1on3<Pixel>> op(*blur_1on3);
152  doScale1(src, srcStartY, srcEndY, srcWidth,
153  dst, dstStartY, dstEndY, op);
154  } else {
155  // No blurring: this is an optimization but it's also needed
156  // for correctness (otherwise there's an overflow in 0.16 fixed
157  // point arithmetic).
159  doScale1(src, srcStartY, srcEndY, srcWidth,
160  dst, dstStartY, dstEndY, op);
161  }
162 }
163 
164 template <class Pixel>
166  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
167  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
168 {
170  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
171 }
172 
173 template <class Pixel>
175  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
176  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
177 {
179  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
180 }
181 
182 template <class Pixel>
184  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
185  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
186 {
188  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
189 }
190 
191 template <class Pixel>
193  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
194  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
195 {
197  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
198 }
199 
200 template <class Pixel>
202  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
203  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
204 {
206  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
207 }
208 
209 template <class Pixel>
211  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
212  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
213 {
215  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
216 }
217 
218 template <class Pixel>
220  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
221  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
222 {
224  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
225 }
226 
227 template <class Pixel>
229  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
230  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
231 {
233  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
234 }
235 
236 template <class Pixel>
238  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
239  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
240 {
242  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
243 }
244 
245 template <class Pixel>
247  FrameSource& src, unsigned srcStartY, unsigned srcEndY,
248  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
249 {
250  int scanlineFactor = settings.getScanlineFactor();
251 
252  unsigned dstHeight = dst.getHeight();
253  unsigned stopDstY = (dstEndY == dstHeight)
254  ? dstEndY : dstEndY - 3;
255  unsigned srcY = srcStartY, dstY = dstStartY;
256  for (/* */; dstY < stopDstY; srcY += 1, dstY += 3) {
257  auto color0 = src.getLineColor<Pixel>(srcY);
258  Pixel color1 = scanline.darken(color0, scanlineFactor);
259  dst.fillLine(dstY + 0, color0);
260  dst.fillLine(dstY + 1, color0);
261  dst.fillLine(dstY + 2, color1);
262  }
263  if (dstY != dstHeight) {
264  unsigned nextLineWidth = src.getLineWidth(srcY + 1);
265  assert(src.getLineWidth(srcY) == 1);
266  assert(nextLineWidth != 1);
267  this->dispatchScale(src, srcY, srcEndY, nextLineWidth,
268  dst, dstY, dstEndY);
269  }
270 }
271 
272 template <class Pixel>
274  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/,
275  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
276 {
277  int scanlineFactor = settings.getScanlineFactor();
278  for (unsigned srcY = srcStartY, dstY = dstStartY;
279  dstY < dstEndY; srcY += 2, dstY += 3) {
280  auto color0 = src.getLineColor<Pixel>(srcY + 0);
281  auto color1 = src.getLineColor<Pixel>(srcY + 1);
282  Pixel color01 = scanline.darken(color0, color1, scanlineFactor);
283  dst.fillLine(dstY + 0, color0);
284  dst.fillLine(dstY + 1, color01);
285  dst.fillLine(dstY + 2, color1);
286  }
287 }
288 
289 
290 // class Blur_1on3
291 
292 template <class Pixel>
294  : mult0(pixelOps)
295  , mult1(pixelOps)
296  , mult2(pixelOps)
297  , mult3(pixelOps)
298 {
299 }
300 
301 #ifdef __SSE2__
302 template<class Pixel>
303 void Blur_1on3<Pixel>::blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth)
304 {
305  if (sizeof(Pixel) != 4) {
306  assert(false); return; // only 32-bpp
307  }
308 
309  assert((srcWidth % 4) == 0);
310  assert(srcWidth >= 8);
311  assert((size_t(in_ ) % 16) == 0);
312  assert((size_t(out_) % 16) == 0);
313 
314  unsigned alpha = blur * 256;
315  unsigned c0 = alpha / 2;
316  unsigned c1 = alpha + c0;
317  unsigned c2 = 0x10000 - c1;
318  unsigned c3 = 0x10000 - alpha;
319  __m128i C0C1 = _mm_set_epi16(c1, c1, c1, c1, c0, c0, c0, c0);
320  __m128i C1C0 = _mm_shuffle_epi32(C0C1, 0x4E);
321  __m128i C2C3 = _mm_set_epi16(c3, c3, c3, c3, c2, c2, c2, c2);
322  __m128i C3C2 = _mm_shuffle_epi32(C2C3, 0x4E);
323 
324  size_t tmp = srcWidth - 4;
325  auto* in = reinterpret_cast<const char*>(in_ + tmp);
326  auto* out = reinterpret_cast< char*>(out_ + 3 * tmp);
327  auto x = -ptrdiff_t(tmp * sizeof(Pixel));
328 
329  __m128i ZERO = _mm_setzero_si128();
330 
331  // Prepare first iteration (duplicate left border pixel)
332  __m128i abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x));
333  __m128i a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
334  __m128i a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
335  __m128i a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
336  __m128i d1d0 = _mm_shuffle_epi32(a0a1, 0x4E); // left border
337 
338  // At the start of each iteration the follwoing vars are live:
339  // abcd, a_b_, a_a_, a0a1, d1d0
340  // Each iteration reads 4 and produces 12 pixels.
341  do {
342  // p01
343  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
344  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
345  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
346  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
347  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
348  // p23
349  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
350  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
351  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
352  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
353  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
354  __m128i p0123 = _mm_packus_epi16(p01, p23);
355  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 0),
356  p0123);
357 
358  // p45
359  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
360  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
361  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
362  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
363  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
364  // p67
365  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
366  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
367  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
368  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
369  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
370  __m128i p4567 = _mm_packus_epi16(p45, p67);
371  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 16),
372  p4567);
373 
374  // p89
375  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
376  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
377  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
378  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
379  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
380  // pab
381  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
382  abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 16));
383  a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
384  a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
385  a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
386  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
387  __m128i p89ab = _mm_packus_epi16(p89, pab);
388  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 32),
389  p89ab);
390 
391  x += 16;
392  } while (x < 0);
393 
394  // Last iteration (duplicate right border pixel)
395  // p01
396  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
397  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
398  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
399  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
400  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
401  // p23
402  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
403  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
404  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
405  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
406  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
407  __m128i p0123 = _mm_packus_epi16(p01, p23);
408  _mm_store_si128(reinterpret_cast<__m128i*>(out + 0),
409  p0123);
410 
411  // p45
412  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
413  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
414  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
415  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
416  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
417  // p67
418  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
419  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
420  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
421  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
422  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
423  __m128i p4567 = _mm_packus_epi16(p45, p67);
424  _mm_store_si128(reinterpret_cast<__m128i*>(out + 16),
425  p4567);
426 
427  // p89
428  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
429  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
430  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
431  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
432  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
433  // pab
434  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
435  a0a1 = _mm_shuffle_epi32(d1d0, 0x4E); // right border
436  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
437  __m128i p89ab = _mm_packus_epi16(p89, pab);
438  _mm_store_si128(reinterpret_cast<__m128i*>(out + 32),
439  p89ab);
440 }
441 #endif
442 
443 template <class Pixel>
445  const Pixel* __restrict in, Pixel* __restrict out,
446  size_t dstWidth)
447 {
448  /* The following code is equivalent to this loop. It is 2x unrolled
449  * and common subexpressions have been eliminated. The last iteration
450  * is also moved outside the for loop.
451  *
452  * unsigned c0 = blur / 2;
453  * unsigned c1 = c0 + blur;
454  * unsigned c2 = 256 - c1;
455  * unsigned c3 = 256 - 2 * c0;
456  * Pixel prev, curr, next;
457  * prev = curr = next = in[0];
458  * size_t srcWidth = dstWidth / 3;
459  * for (unsigned x = 0; x < srcWidth; ++x) {
460  * if (x != (srcWidth - 1)) next = in[x + 1];
461  * out[3 * x + 0] = mul(c1, prev) + mul(c2, curr);
462  * out[3 * x + 1] = mul(c0, prev) + mul(c3, curr) + mul(c0, next);
463  * out[3 * x + 2] = mul(c2, curr) + mul(c1, next);
464  * prev = curr;
465  * curr = next;
466  * }
467  */
468  size_t srcWidth = dstWidth / 3;
469 #ifdef __SSE2__
470  if (sizeof(Pixel) == 4) {
471  blur_SSE(in, out, srcWidth);
472  return;
473  }
474 #endif
475 
476  // C++ routine, both 16bpp and 32bpp
477  unsigned c0 = blur / 2;
478  unsigned c1 = blur + c0;
479  unsigned c2 = 256 - c1;
480  unsigned c3 = 256 - 2 * c0;
481  mult0.setFactor32(c0);
482  mult1.setFactor32(c1);
483  mult2.setFactor32(c2);
484  mult3.setFactor32(c3);
485 
486  Pixel p0 = in[0];
487  Pixel p1;
488  uint32_t f0 = mult0.mul32(p0);
489  uint32_t f1 = mult1.mul32(p0);
490  uint32_t g0 = f0;
491  uint32_t g1 = f1;
492 
493  size_t x;
494  for (x = 0; x < (srcWidth - 2); x += 2) {
495  uint32_t g2 = mult2.mul32(p0);
496  out[3 * x + 0] = mult0.conv32(g2 + f1);
497  p1 = in[x + 1];
498  uint32_t t0 = mult0.mul32(p1);
499  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
500  f0 = t0;
501  f1 = mult1.mul32(p1);
502  out[3 * x + 2] = mult0.conv32(g2 + f1);
503 
504  uint32_t f2 = mult2.mul32(p1);
505  out[3 * x + 3] = mult0.conv32(f2 + g1);
506  p0 = in[x + 2];
507  uint32_t t1 = mult0.mul32(p0);
508  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + t1);
509  g0 = t1;
510  g1 = mult1.mul32(p0);
511  out[3 * x + 5] = mult0.conv32(g1 + f2);
512  }
513  uint32_t g2 = mult2.mul32(p0);
514  out[3 * x + 0] = mult0.conv32(g2 + f1);
515  p1 = in[x + 1];
516  uint32_t t0 = mult0.mul32(p1);
517  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
518  f0 = t0;
519  f1 = mult1.mul32(p1);
520  out[3 * x + 2] = mult0.conv32(g2 + f1);
521 
522  uint32_t f2 = mult2.mul32(p1);
523  out[3 * x + 3] = mult0.conv32(f2 + g1);
524  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + f0);
525  out[3 * x + 5] = p1;
526 }
527 
528 template <class Pixel>
530  FrameSource& src, const RawFrame* superImpose,
531  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
532  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
533 {
534  if (superImpose) {
535  SuperImposedVideoFrame<Pixel> sf(src, *superImpose, pixelOps);
536  srcWidth = sf.getLineWidth(srcStartY);
537  this->dispatchScale(sf, srcStartY, srcEndY, srcWidth,
538  dst, dstStartY, dstEndY);
539  } else {
540  this->dispatchScale(src, srcStartY, srcEndY, srcWidth,
541  dst, dstStartY, dstEndY);
542  }
543 }
544 
545 // Force template instantiation.
546 #if HAVE_16BPP
547 template class Simple3xScaler<uint16_t>;
548 #endif
549 #if HAVE_32BPP
550 template class Simple3xScaler<uint32_t>;
551 #endif
552 
553 } // namespace openmsx
int getScanlineFactor() const
The alpha value [0..255] of the gap between scanlines.
auto copy(InputRange &&range, OutputIter out)
Definition: ranges.hh:149
This class represents a frame that is the (per-pixel) alpha-blend of a (laser-disc) video frame and a...
Helper class to perform &#39;pixel x scalar&#39; calculations.
Definition: Multiply32.hh:14
Pixel getLineColor(unsigned line) const
Get the (single) color of the given line.
Definition: FrameSource.hh:74
void setBlur(unsigned blur_)
virtual void fillLine(unsigned y, Pixel color)=0
virtual Pixel * acquireLine(unsigned y)=0
Polymorphic wrapper around another line scaler.
Definition: LineScalers.hh:310
STL namespace.
Polymorphic line scaler.
Definition: LineScalers.hh:282
uint32_t Pixel
Interface for getting lines from a video frame.
Definition: FrameSource.hh:14
mat4 scale(const vec3 &xyz)
Definition: gl_transform.hh:19
virtual unsigned getHeight() const =0
A video frame as output by the VDP scanline conversion unit, before any postprocessing filters are ap...
Definition: RawFrame.hh:25
int getBlurFactor() const
The amount of horizontal blur [0..256].
mat23 p23(vec2(2, 3), vec2(4, 5), vec2(0, 7))
Blur_1on3(const PixelOperations< Pixel > &pixelOps)
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
virtual void releaseLine(unsigned y, Pixel *buf)=0
Like PolyScale above, but instead keeps a reference to the actual scaler.
Definition: LineScalers.hh:337
Class containing all settings for renderers.
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:91
void operator()(const Pixel *in, Pixel *out, size_t dstWidth)
unsigned getLineWidth(unsigned line) const override
Gets the number of display pixels on the given line.
virtual unsigned getLineWidth(unsigned line) const =0
Gets the number of display pixels on the given line.
void dispatchScale(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Definition: Scaler3.cc:223
virtual unsigned getWidth() const =0
Simple3xScaler(const PixelOperations< Pixel > &pixelOps, const RenderSettings &settings)
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
Base class for 3x scalers.
Definition: Scaler3.hh:11