openMSX
Simple3xScaler.cc
Go to the documentation of this file.
1 #include "Simple3xScaler.hh"
3 #include "LineScalers.hh"
4 #include "RawFrame.hh"
5 #include "ScalerOutput.hh"
6 #include "RenderSettings.hh"
7 #include "Multiply32.hh"
8 #include "vla.hh"
9 #include "memory.hh"
10 #include <cstdint>
11 #ifdef __SSE2__
12 #include <emmintrin.h>
13 #endif
14 
15 namespace openmsx {
16 
17 template <class Pixel> class Blur_1on3
18 {
19 public:
20  explicit Blur_1on3(const PixelOperations<Pixel>& pixelOps);
21  inline void setBlur(unsigned blur_) { blur = blur_; }
22  void operator()(const Pixel* in, Pixel* out, size_t dstWidth);
23 private:
24  Multiply32<Pixel> mult0;
25  Multiply32<Pixel> mult1;
26  Multiply32<Pixel> mult2;
27  Multiply32<Pixel> mult3;
28  unsigned blur;
29 #ifdef __SSE2__
30  void blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth);
31 #endif
32 };
33 
34 
35 template <class Pixel>
37  const PixelOperations<Pixel>& pixelOps_,
38  const RenderSettings& settings_)
39  : Scaler3<Pixel>(pixelOps_)
40  , pixelOps(pixelOps_)
41  , scanline(pixelOps_)
42  , blur_1on3(make_unique<Blur_1on3<Pixel>>(pixelOps_))
43  , settings(settings_)
44 {
45 }
46 
47 template <class Pixel>
49 
50 template <typename Pixel>
52  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
53  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
55 {
56  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
57  int scanlineFactor = settings.getScanlineFactor();
58  unsigned dstWidth = dst.getWidth();
59  unsigned y = dstStartY;
60  auto* srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
61  auto* dstLine0 = dst.acquireLine(y + 0);
62  scale(srcLine, dstLine0, dstWidth);
63 
64  Scale_1on1<Pixel> copy;
65  auto* dstLine1 = dst.acquireLine(y + 1);
66  copy(dstLine0, dstLine1, dstWidth);
67 
68  for (/* */; (y + 4) < dstEndY; y += 3, srcStartY += 1) {
69  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
70  auto* dstLine3 = dst.acquireLine(y + 3);
71  scale(srcLine, dstLine3, dstWidth);
72 
73  auto* dstLine4 = dst.acquireLine(y + 4);
74  copy(dstLine3, dstLine4, dstWidth);
75 
76  auto* dstLine2 = dst.acquireLine(y + 2);
77  scanline.draw(dstLine0, dstLine3, dstLine2,
78  scanlineFactor, dstWidth);
79 
80  dst.releaseLine(y + 0, dstLine0);
81  dst.releaseLine(y + 1, dstLine1);
82  dst.releaseLine(y + 2, dstLine2);
83  dstLine0 = dstLine3;
84  dstLine1 = dstLine4;
85  }
86  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
87  VLA_SSE_ALIGNED(Pixel, buf2, dstWidth);
88  scale(srcLine, buf2, dstWidth);
89 
90  auto* dstLine2 = dst.acquireLine(y + 2);
91  scanline.draw(dstLine0, buf2, dstLine2, scanlineFactor, dstWidth);
92  dst.releaseLine(y + 0, dstLine0);
93  dst.releaseLine(y + 1, dstLine1);
94  dst.releaseLine(y + 2, dstLine2);
95 }
96 
97 template <typename Pixel>
99  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
100  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
101  PolyLineScaler<Pixel>& scale)
102 {
103  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
104  int scanlineFactor = settings.getScanlineFactor();
105  unsigned dstWidth = dst.getWidth();
106  for (unsigned srcY = srcStartY, dstY = dstStartY; dstY < dstEndY;
107  srcY += 2, dstY += 3) {
108  auto* srcLine0 = src.getLinePtr(srcY + 0, srcWidth, buf);
109  auto* dstLine0 = dst.acquireLine(dstY + 0);
110  scale(srcLine0, dstLine0, dstWidth);
111 
112  auto* srcLine1 = src.getLinePtr(srcY + 1, srcWidth, buf);
113  auto* dstLine2 = dst.acquireLine(dstY + 2);
114  scale(srcLine1, dstLine2, dstWidth);
115 
116  auto* dstLine1 = dst.acquireLine(dstY + 1);
117  scanline.draw(dstLine0, dstLine2, dstLine1,
118  scanlineFactor, dstWidth);
119 
120  dst.releaseLine(dstY + 0, dstLine0);
121  dst.releaseLine(dstY + 1, dstLine1);
122  dst.releaseLine(dstY + 2, dstLine2);
123  }
124 }
125 
126 template <class Pixel>
128  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
129  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
130 {
132  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
133 }
134 
135 template <class Pixel>
137  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
138  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
139 {
141  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
142 }
143 
144 template <class Pixel>
146  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
147  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
148 {
149  if (unsigned blur = settings.getBlurFactor() / 3) {
150  blur_1on3->setBlur(blur);
151  PolyScaleRef<Pixel, Blur_1on3<Pixel>> op(*blur_1on3);
152  doScale1(src, srcStartY, srcEndY, srcWidth,
153  dst, dstStartY, dstEndY, op);
154  } else {
155  // No blurring: this is an optimization but it's also needed
156  // for correctness (otherwise there's an overflow in 0.16 fixed
157  // point arithmetic).
159  doScale1(src, srcStartY, srcEndY, srcWidth,
160  dst, dstStartY, dstEndY, op);
161  }
162 }
163 
164 template <class Pixel>
166  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
167  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
168 {
170  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
171 }
172 
173 template <class Pixel>
175  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
176  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
177 {
179  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
180 }
181 
182 template <class Pixel>
184  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
185  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
186 {
188  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
189 }
190 
191 template <class Pixel>
193  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
194  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
195 {
197  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
198 }
199 
200 template <class Pixel>
202  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
203  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
204 {
206  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
207 }
208 
209 template <class Pixel>
211  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
212  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
213 {
215  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
216 }
217 
218 template <class Pixel>
220  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
221  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
222 {
224  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
225 }
226 
227 template <class Pixel>
229  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
230  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
231 {
233  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
234 }
235 
236 template <class Pixel>
238  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
239  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
240 {
242  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
243 }
244 
245 template <class Pixel>
247  FrameSource& src, unsigned srcStartY, unsigned srcEndY,
248  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
249 {
250  int scanlineFactor = settings.getScanlineFactor();
251 
252  unsigned dstHeight = dst.getHeight();
253  unsigned stopDstY = (dstEndY == dstHeight)
254  ? dstEndY : dstEndY - 3;
255  unsigned srcY = srcStartY, dstY = dstStartY;
256  for (/* */; dstY < stopDstY; srcY += 1, dstY += 3) {
257  Pixel color0 = src.getLineColor<Pixel>(srcY);
258  Pixel color1 = scanline.darken(color0, scanlineFactor);
259  dst.fillLine(dstY + 0, color0);
260  dst.fillLine(dstY + 1, color0);
261  dst.fillLine(dstY + 2, color1);
262  }
263  if (dstY != dstHeight) {
264  unsigned nextLineWidth = src.getLineWidth(srcY + 1);
265  assert(src.getLineWidth(srcY) == 1);
266  assert(nextLineWidth != 1);
267  this->dispatchScale(src, srcY, srcEndY, nextLineWidth,
268  dst, dstY, dstEndY);
269  }
270 }
271 
272 template <class Pixel>
274  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/,
275  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
276 {
277  int scanlineFactor = settings.getScanlineFactor();
278  for (unsigned srcY = srcStartY, dstY = dstStartY;
279  dstY < dstEndY; srcY += 2, dstY += 3) {
280  Pixel color0 = src.getLineColor<Pixel>(srcY + 0);
281  Pixel color1 = src.getLineColor<Pixel>(srcY + 1);
282  Pixel color01 = scanline.darken(color0, color1, scanlineFactor);
283  dst.fillLine(dstY + 0, color0);
284  dst.fillLine(dstY + 1, color01);
285  dst.fillLine(dstY + 2, color1);
286  }
287 }
288 
289 
290 // class Blur_1on3
291 
292 template <class Pixel>
294  : mult0(pixelOps)
295  , mult1(pixelOps)
296  , mult2(pixelOps)
297  , mult3(pixelOps)
298 {
299 }
300 
301 #ifdef __SSE2__
302 template<class Pixel>
303 void Blur_1on3<Pixel>::blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth)
304 {
305  if (sizeof(Pixel) != 4) {
306  assert(false); return; // only 32-bpp
307  }
308 
309  assert((srcWidth % 4) == 0);
310  assert(srcWidth >= 8);
311  assert((size_t(in_ ) % 16) == 0);
312  assert((size_t(out_) % 16) == 0);
313 
314  unsigned alpha = blur * 256;
315  unsigned c0 = alpha / 2;
316  unsigned c1 = alpha + c0;
317  unsigned c2 = 0x10000 - c1;
318  unsigned c3 = 0x10000 - alpha;
319  __m128i C0C1 = _mm_set_epi16(c1, c1, c1, c1, c0, c0, c0, c0);
320  __m128i C1C0 = _mm_shuffle_epi32(C0C1, 0x4E);
321  __m128i C2C3 = _mm_set_epi16(c3, c3, c3, c3, c2, c2, c2, c2);
322  __m128i C3C2 = _mm_shuffle_epi32(C2C3, 0x4E);
323 
324  size_t tmp = srcWidth - 4;
325  auto* in = reinterpret_cast<const char*>(in_ + tmp);
326  auto* out = reinterpret_cast< char*>(out_ + 3 * tmp);
327  auto x = -ptrdiff_t(tmp * sizeof(Pixel));
328 
329  __m128i ZERO = _mm_setzero_si128();
330 
331  // Prepare first iteration (duplicate left border pixel)
332  __m128i abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x));
333  __m128i a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
334  __m128i a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
335  __m128i a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
336  __m128i d1d0 = _mm_shuffle_epi32(a0a1, 0x4E); // left border
337 
338  // At the start of each iteration the follwoing vars are live:
339  // abcd, a_b_, a_a_, a0a1, d1d0
340  // Each iteration reads 4 and produces 12 pixels.
341  do {
342  // p01
343  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
344  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
345  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
346  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
347  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
348  // p23
349  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
350  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
351  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
352  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
353  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
354  __m128i p0123 = _mm_packus_epi16(p01, p23);
355  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 0),
356  p0123);
357 
358  // p45
359  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
360  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
361  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
362  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
363  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
364  // p67
365  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
366  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
367  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
368  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
369  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
370  __m128i p4567 = _mm_packus_epi16(p45, p67);
371  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 16),
372  p4567);
373 
374  // p89
375  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
376  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
377  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
378  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
379  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
380  // pab
381  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
382  abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 16));
383  a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
384  a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
385  a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
386  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
387  __m128i p89ab = _mm_packus_epi16(p89, pab);
388  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 32),
389  p89ab);
390 
391  x += 16;
392  } while (x < 0);
393 
394  // Last iteration (duplicate right border pixel)
395  // p01
396  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
397  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
398  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
399  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
400  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
401  // p23
402  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
403  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
404  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
405  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
406  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
407  __m128i p0123 = _mm_packus_epi16(p01, p23);
408  _mm_store_si128(reinterpret_cast<__m128i*>(out + 0),
409  p0123);
410 
411  // p45
412  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
413  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
414  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
415  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
416  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
417  // p67
418  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
419  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
420  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
421  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
422  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
423  __m128i p4567 = _mm_packus_epi16(p45, p67);
424  _mm_store_si128(reinterpret_cast<__m128i*>(out + 16),
425  p4567);
426 
427  // p89
428  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
429  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
430  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
431  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
432  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
433  // pab
434  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
435  a0a1 = _mm_shuffle_epi32(d1d0, 0x4E); // right border
436  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
437  __m128i p89ab = _mm_packus_epi16(p89, pab);
438  _mm_store_si128(reinterpret_cast<__m128i*>(out + 32),
439  p89ab);
440 }
441 #endif
442 
443 template <class Pixel>
445  const Pixel* __restrict in, Pixel* __restrict out,
446  size_t dstWidth)
447 {
448  /* The following code is equivalent to this loop. It is 2x unrolled
449  * and common subexpressions have been eliminated. The last iteration
450  * is also moved outside the for loop.
451  *
452  * unsigned c0 = blur / 2;
453  * unsigned c1 = c0 + blur;
454  * unsigned c2 = 256 - c1;
455  * unsigned c3 = 256 - 2 * c0;
456  * Pixel prev, curr, next;
457  * prev = curr = next = in[0];
458  * size_t srcWidth = dstWidth / 3;
459  * for (unsigned x = 0; x < srcWidth; ++x) {
460  * if (x != (srcWidth - 1)) next = in[x + 1];
461  * out[3 * x + 0] = mul(c1, prev) + mul(c2, curr);
462  * out[3 * x + 1] = mul(c0, prev) + mul(c3, curr) + mul(c0, next);
463  * out[3 * x + 2] = mul(c2, curr) + mul(c1, next);
464  * prev = curr;
465  * curr = next;
466  * }
467  */
468  size_t srcWidth = dstWidth / 3;
469 #ifdef __SSE2__
470  if (sizeof(Pixel) == 4) {
471  blur_SSE(in, out, srcWidth);
472  return;
473  }
474 #endif
475 
476  // C++ routine, both 16bpp and 32bpp
477  unsigned c0 = blur / 2;
478  unsigned c1 = blur + c0;
479  unsigned c2 = 256 - c1;
480  unsigned c3 = 256 - 2 * c0;
481  mult0.setFactor32(c0);
482  mult1.setFactor32(c1);
483  mult2.setFactor32(c2);
484  mult3.setFactor32(c3);
485 
486  Pixel p0 = in[0];
487  Pixel p1;
488  uint32_t f0 = mult0.mul32(p0);
489  uint32_t f1 = mult1.mul32(p0);
490  uint32_t g0 = f0;
491  uint32_t g1 = f1;
492 
493  size_t x;
494  for (x = 0; x < (srcWidth - 2); x += 2) {
495  uint32_t g2 = mult2.mul32(p0);
496  out[3 * x + 0] = mult0.conv32(g2 + f1);
497  p1 = in[x + 1];
498  uint32_t t0 = mult0.mul32(p1);
499  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
500  f0 = t0;
501  f1 = mult1.mul32(p1);
502  out[3 * x + 2] = mult0.conv32(g2 + f1);
503 
504  uint32_t f2 = mult2.mul32(p1);
505  out[3 * x + 3] = mult0.conv32(f2 + g1);
506  p0 = in[x + 2];
507  uint32_t t1 = mult0.mul32(p0);
508  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + t1);
509  g0 = t1;
510  g1 = mult1.mul32(p0);
511  out[3 * x + 5] = mult0.conv32(g1 + f2);
512  }
513  uint32_t g2 = mult2.mul32(p0);
514  out[3 * x + 0] = mult0.conv32(g2 + f1);
515  p1 = in[x + 1];
516  uint32_t t0 = mult0.mul32(p1);
517  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
518  f0 = t0;
519  f1 = mult1.mul32(p1);
520  out[3 * x + 2] = mult0.conv32(g2 + f1);
521 
522  uint32_t f2 = mult2.mul32(p1);
523  out[3 * x + 3] = mult0.conv32(f2 + g1);
524  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + f0);
525  out[3 * x + 5] = p1;
526 }
527 
528 template <class Pixel>
530  FrameSource& src, const RawFrame* superImpose,
531  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
532  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
533 {
534  if (superImpose) {
535  SuperImposedVideoFrame<Pixel> sf(src, *superImpose, pixelOps);
536  srcWidth = sf.getLineWidth(srcStartY);
537  this->dispatchScale(sf, srcStartY, srcEndY, srcWidth,
538  dst, dstStartY, dstEndY);
539  } else {
540  this->dispatchScale(src, srcStartY, srcEndY, srcWidth,
541  dst, dstStartY, dstEndY);
542  }
543 }
544 
545 // Force template instantiation.
546 #if HAVE_16BPP
547 template class Simple3xScaler<uint16_t>;
548 #endif
549 #if HAVE_32BPP
550 template class Simple3xScaler<uint32_t>;
551 #endif
552 
553 } // namespace openmsx
int getScanlineFactor() const
The alpha value [0..255] of the gap between scanlines.
This class represents a frame that is the (per-pixel) alpha-blend of a (laser-disc) video frame and a...
Helper class to perform &#39;pixel x scalar&#39; calculations.
Definition: Multiply32.hh:14
void setBlur(unsigned blur_)
virtual void fillLine(unsigned y, Pixel color)=0
Simple3xScaler(const PixelOperations< Pixel > &pixelOps, const RenderSettings &renderSettings)
virtual Pixel * acquireLine(unsigned y)=0
Polymorphic wrapper around another line scaler.
Definition: LineScalers.hh:310
Polymorphic line scaler.
Definition: LineScalers.hh:282
uint32_t Pixel
Interface for getting lines from a video frame.
Definition: FrameSource.hh:14
mat4 scale(const vec3 &xyz)
Definition: gl_transform.hh:19
virtual unsigned getHeight() const =0
A video frame as output by the VDP scanline conversion unit, before any postprocessing filters are ap...
Definition: RawFrame.hh:25
int getBlurFactor() const
The amount of horizontal blur [0..256].
Blur_1on3(const PixelOperations< Pixel > &pixelOps)
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
virtual void releaseLine(unsigned y, Pixel *buf)=0
Like PolyScale above, but instead keeps a reference to the actual scaler.
Definition: LineScalers.hh:337
Class containing all settings for renderers.
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:91
void operator()(const Pixel *in, Pixel *out, size_t dstWidth)
unsigned getLineWidth(unsigned line) const override
Gets the number of display pixels on the given line.
const Pixel getLineColor(unsigned line) const
Get the (single) color of the given line.
Definition: FrameSource.hh:74
virtual unsigned getLineWidth(unsigned line) const =0
Gets the number of display pixels on the given line.
std::unique_ptr< T > make_unique()
Definition: memory.hh:27
void dispatchScale(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Definition: Scaler3.cc:223
virtual unsigned getWidth() const =0
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
Base class for 3x scalers.
Definition: Scaler3.hh:11