openMSX
Simple3xScaler.cc
Go to the documentation of this file.
1 #include "Simple3xScaler.hh"
3 #include "LineScalers.hh"
4 #include "RawFrame.hh"
5 #include "ScalerOutput.hh"
6 #include "RenderSettings.hh"
7 #include "Multiply32.hh"
8 #include "vla.hh"
9 #include "memory.hh"
10 #include <cstdint>
11 #ifdef __SSE2__
12 #include <emmintrin.h>
13 #endif
14 
15 namespace openmsx {
16 
17 template <class Pixel> class Blur_1on3
18 {
19 public:
20  explicit Blur_1on3(const PixelOperations<Pixel>& pixelOps);
21  inline void setBlur(unsigned blur_) { blur = blur_; }
22  void operator()(const Pixel* in, Pixel* out, size_t dstWidth);
23 private:
24  Multiply32<Pixel> mult0;
25  Multiply32<Pixel> mult1;
26  Multiply32<Pixel> mult2;
27  Multiply32<Pixel> mult3;
28  unsigned blur;
29 #ifdef __SSE2__
30  void blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth);
31 #endif
32 };
33 
34 
35 template <class Pixel>
37  const PixelOperations<Pixel>& pixelOps_,
38  const RenderSettings& settings_)
39  : Scaler3<Pixel>(pixelOps_)
40  , pixelOps(pixelOps_)
41  , scanline(pixelOps_)
42  , blur_1on3(make_unique<Blur_1on3<Pixel>>(pixelOps_))
43  , settings(settings_)
44 {
45 }
46 
47 template <class Pixel>
49 {
50 }
51 
52 template <typename Pixel>
54  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
55  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
57 {
58  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
59  int scanlineFactor = settings.getScanlineFactor();
60  unsigned dstWidth = dst.getWidth();
61  unsigned y = dstStartY;
62  auto* srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
63  auto* dstLine0 = dst.acquireLine(y + 0);
64  scale(srcLine, dstLine0, dstWidth);
65 
66  Scale_1on1<Pixel> copy;
67  auto* dstLine1 = dst.acquireLine(y + 1);
68  copy(dstLine0, dstLine1, dstWidth);
69 
70  for (/* */; (y + 4) < dstEndY; y += 3, srcStartY += 1) {
71  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
72  auto* dstLine3 = dst.acquireLine(y + 3);
73  scale(srcLine, dstLine3, dstWidth);
74 
75  auto* dstLine4 = dst.acquireLine(y + 4);
76  copy(dstLine3, dstLine4, dstWidth);
77 
78  auto* dstLine2 = dst.acquireLine(y + 2);
79  scanline.draw(dstLine0, dstLine3, dstLine2,
80  scanlineFactor, dstWidth);
81 
82  dst.releaseLine(y + 0, dstLine0);
83  dst.releaseLine(y + 1, dstLine1);
84  dst.releaseLine(y + 2, dstLine2);
85  dstLine0 = dstLine3;
86  dstLine1 = dstLine4;
87  }
88  srcLine = src.getLinePtr(srcStartY, srcWidth, buf);
89  VLA_SSE_ALIGNED(Pixel, buf2, dstWidth);
90  scale(srcLine, buf2, dstWidth);
91 
92  auto* dstLine2 = dst.acquireLine(y + 2);
93  scanline.draw(dstLine0, buf2, dstLine2, scanlineFactor, dstWidth);
94  dst.releaseLine(y + 0, dstLine0);
95  dst.releaseLine(y + 1, dstLine1);
96  dst.releaseLine(y + 2, dstLine2);
97 }
98 
99 template <typename Pixel>
101  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
102  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY,
103  PolyLineScaler<Pixel>& scale)
104 {
105  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
106  int scanlineFactor = settings.getScanlineFactor();
107  unsigned dstWidth = dst.getWidth();
108  for (unsigned srcY = srcStartY, dstY = dstStartY; dstY < dstEndY;
109  srcY += 2, dstY += 3) {
110  auto* srcLine0 = src.getLinePtr(srcY + 0, srcWidth, buf);
111  auto* dstLine0 = dst.acquireLine(dstY + 0);
112  scale(srcLine0, dstLine0, dstWidth);
113 
114  auto* srcLine1 = src.getLinePtr(srcY + 1, srcWidth, buf);
115  auto* dstLine2 = dst.acquireLine(dstY + 2);
116  scale(srcLine1, dstLine2, dstWidth);
117 
118  auto* dstLine1 = dst.acquireLine(dstY + 1);
119  scanline.draw(dstLine0, dstLine2, dstLine1,
120  scanlineFactor, dstWidth);
121 
122  dst.releaseLine(dstY + 0, dstLine0);
123  dst.releaseLine(dstY + 1, dstLine1);
124  dst.releaseLine(dstY + 2, dstLine2);
125  }
126 }
127 
128 template <class Pixel>
130  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
131  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
132 {
134  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
135 }
136 
137 template <class Pixel>
139  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
140  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
141 {
143  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
144 }
145 
146 template <class Pixel>
148  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
149  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
150 {
151  if (unsigned blur = settings.getBlurFactor() / 3) {
152  blur_1on3->setBlur(blur);
153  PolyScaleRef<Pixel, Blur_1on3<Pixel>> op(*blur_1on3);
154  doScale1(src, srcStartY, srcEndY, srcWidth,
155  dst, dstStartY, dstEndY, op);
156  } else {
157  // No blurring: this is an optimization but it's also needed
158  // for correctness (otherwise there's an overflow in 0.16 fixed
159  // point arithmetic).
161  doScale1(src, srcStartY, srcEndY, srcWidth,
162  dst, dstStartY, dstEndY, op);
163  }
164 }
165 
166 template <class Pixel>
168  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
169  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
170 {
172  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
173 }
174 
175 template <class Pixel>
177  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
178  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
179 {
181  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
182 }
183 
184 template <class Pixel>
186  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
187  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
188 {
190  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
191 }
192 
193 template <class Pixel>
195  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
196  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
197 {
199  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
200 }
201 
202 template <class Pixel>
204  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
205  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
206 {
208  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
209 }
210 
211 template <class Pixel>
213  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
214  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
215 {
217  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
218 }
219 
220 template <class Pixel>
222  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
223  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
224 {
226  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
227 }
228 
229 template <class Pixel>
231  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
232  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
233 {
235  doScale1(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
236 }
237 
238 template <class Pixel>
240  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
241  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
242 {
244  doScale2(src, srcStartY, srcEndY, srcWidth, dst, dstStartY, dstEndY, op);
245 }
246 
247 template <class Pixel>
249  FrameSource& src, unsigned srcStartY, unsigned srcEndY,
250  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
251 {
252  int scanlineFactor = settings.getScanlineFactor();
253 
254  unsigned dstHeight = dst.getHeight();
255  unsigned stopDstY = (dstEndY == dstHeight)
256  ? dstEndY : dstEndY - 3;
257  unsigned srcY = srcStartY, dstY = dstStartY;
258  for (/* */; dstY < stopDstY; srcY += 1, dstY += 3) {
259  Pixel color0 = src.getLineColor<Pixel>(srcY);
260  Pixel color1 = scanline.darken(color0, scanlineFactor);
261  dst.fillLine(dstY + 0, color0);
262  dst.fillLine(dstY + 1, color0);
263  dst.fillLine(dstY + 2, color1);
264  }
265  if (dstY != dstHeight) {
266  unsigned nextLineWidth = src.getLineWidth(srcY + 1);
267  assert(src.getLineWidth(srcY) == 1);
268  assert(nextLineWidth != 1);
269  this->dispatchScale(src, srcY, srcEndY, nextLineWidth,
270  dst, dstY, dstEndY);
271  }
272 }
273 
274 template <class Pixel>
276  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/,
277  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
278 {
279  int scanlineFactor = settings.getScanlineFactor();
280  for (unsigned srcY = srcStartY, dstY = dstStartY;
281  dstY < dstEndY; srcY += 2, dstY += 3) {
282  Pixel color0 = src.getLineColor<Pixel>(srcY + 0);
283  Pixel color1 = src.getLineColor<Pixel>(srcY + 1);
284  Pixel color01 = scanline.darken(color0, color1, scanlineFactor);
285  dst.fillLine(dstY + 0, color0);
286  dst.fillLine(dstY + 1, color01);
287  dst.fillLine(dstY + 2, color1);
288  }
289 }
290 
291 
292 // class Blur_1on3
293 
294 template <class Pixel>
296  : mult0(pixelOps)
297  , mult1(pixelOps)
298  , mult2(pixelOps)
299  , mult3(pixelOps)
300 {
301 }
302 
303 #ifdef __SSE2__
304 template<class Pixel>
305 void Blur_1on3<Pixel>::blur_SSE(const Pixel* in_, Pixel* out_, size_t srcWidth)
306 {
307  if (sizeof(Pixel) != 4) {
308  assert(false); return; // only 32-bpp
309  }
310 
311  assert((srcWidth % 4) == 0);
312  assert(srcWidth >= 8);
313  assert((size_t(in_ ) % 16) == 0);
314  assert((size_t(out_) % 16) == 0);
315 
316  unsigned alpha = blur * 256;
317  unsigned c0 = alpha / 2;
318  unsigned c1 = alpha + c0;
319  unsigned c2 = 0x10000 - c1;
320  unsigned c3 = 0x10000 - alpha;
321  __m128i C0C1 = _mm_set_epi16(c1, c1, c1, c1, c0, c0, c0, c0);
322  __m128i C1C0 = _mm_shuffle_epi32(C0C1, 0x4E);
323  __m128i C2C3 = _mm_set_epi16(c3, c3, c3, c3, c2, c2, c2, c2);
324  __m128i C3C2 = _mm_shuffle_epi32(C2C3, 0x4E);
325 
326  size_t tmp = srcWidth - 4;
327  auto* in = reinterpret_cast<const char*>(in_ + tmp);
328  auto* out = reinterpret_cast< char*>(out_ + 3 * tmp);
329  auto x = -ptrdiff_t(tmp * sizeof(Pixel));
330 
331  __m128i ZERO = _mm_setzero_si128();
332 
333  // Prepare first iteration (duplicate left border pixel)
334  __m128i abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x));
335  __m128i a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
336  __m128i a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
337  __m128i a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
338  __m128i d1d0 = _mm_shuffle_epi32(a0a1, 0x4E); // left border
339 
340  // At the start of each iteration the follwoing vars are live:
341  // abcd, a_b_, a_a_, a0a1, d1d0
342  // Each iteration reads 4 and produces 12 pixels.
343  do {
344  // p01
345  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
346  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
347  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
348  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
349  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
350  // p23
351  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
352  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
353  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
354  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
355  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
356  __m128i p0123 = _mm_packus_epi16(p01, p23);
357  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 0),
358  p0123);
359 
360  // p45
361  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
362  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
363  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
364  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
365  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
366  // p67
367  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
368  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
369  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
370  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
371  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
372  __m128i p4567 = _mm_packus_epi16(p45, p67);
373  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 16),
374  p4567);
375 
376  // p89
377  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
378  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
379  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
380  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
381  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
382  // pab
383  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
384  abcd = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 16));
385  a_b_ = _mm_unpacklo_epi8(abcd, ZERO);
386  a_a_ = _mm_unpacklo_epi64(a_b_, a_b_);
387  a0a1 = _mm_mulhi_epu16(a_a_, C0C1);
388  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
389  __m128i p89ab = _mm_packus_epi16(p89, pab);
390  _mm_store_si128(reinterpret_cast<__m128i*>(out + 3 * x + 32),
391  p89ab);
392 
393  x += 16;
394  } while (x < 0);
395 
396  // Last iteration (duplicate right border pixel)
397  // p01
398  __m128i a2a3 = _mm_mulhi_epu16(a_a_, C2C3);
399  __m128i b_b_ = _mm_unpackhi_epi64(a_b_, a_b_);
400  __m128i b1b0 = _mm_mulhi_epu16(b_b_, C1C0);
401  __m128i xxb0 = _mm_unpackhi_epi64(ZERO, b1b0);
402  __m128i p01 = _mm_add_epi16(_mm_add_epi16(d1d0, a2a3), xxb0);
403  // p23
404  __m128i xxa1 = _mm_unpackhi_epi64(ZERO, a0a1);
405  __m128i b3b2 = _mm_mulhi_epu16(b_b_, C3C2);
406  __m128i a2b2 = shuffle<0xE4>(a2a3, b3b2);
407  __m128i b1xx = _mm_unpacklo_epi64(b1b0, ZERO);
408  __m128i p23 = _mm_add_epi16(_mm_add_epi16(xxa1, a2b2), b1xx);
409  __m128i p0123 = _mm_packus_epi16(p01, p23);
410  _mm_store_si128(reinterpret_cast<__m128i*>(out + 0),
411  p0123);
412 
413  // p45
414  __m128i a0xx = _mm_unpacklo_epi64(a0a1, ZERO);
415  __m128i c_d_ = _mm_unpackhi_epi8(abcd, ZERO);
416  __m128i c_c_ = _mm_unpacklo_epi64(c_d_, c_d_);
417  __m128i c0c1 = _mm_mulhi_epu16(c_c_, C0C1);
418  __m128i p45 = _mm_add_epi16(_mm_add_epi16(a0xx, b3b2), c0c1);
419  // p67
420  __m128i c2c3 = _mm_mulhi_epu16(c_c_, C2C3);
421  __m128i d_d_ = _mm_unpackhi_epi64(c_d_, c_d_);
422  d1d0 = _mm_mulhi_epu16(d_d_, C1C0);
423  __m128i xxd0 = _mm_unpackhi_epi64(ZERO, d1d0);
424  __m128i p67 = _mm_add_epi16(_mm_add_epi16(b1b0, c2c3), xxd0);
425  __m128i p4567 = _mm_packus_epi16(p45, p67);
426  _mm_store_si128(reinterpret_cast<__m128i*>(out + 16),
427  p4567);
428 
429  // p89
430  __m128i xxc1 = _mm_unpackhi_epi64(ZERO, c0c1);
431  __m128i d3d2 = _mm_mulhi_epu16(d_d_, C3C2);
432  __m128i c2d2 = shuffle<0xE4>(c2c3, d3d2);
433  __m128i d1xx = _mm_unpacklo_epi64(d1d0, ZERO);
434  __m128i p89 = _mm_add_epi16(_mm_add_epi16(xxc1, c2d2), d1xx);
435  // pab
436  __m128i c0xx = _mm_unpacklo_epi64(c0c1, ZERO);
437  a0a1 = _mm_shuffle_epi32(d1d0, 0x4E); // right border
438  __m128i pab = _mm_add_epi16(_mm_add_epi16(c0xx, d3d2), a0a1);
439  __m128i p89ab = _mm_packus_epi16(p89, pab);
440  _mm_store_si128(reinterpret_cast<__m128i*>(out + 32),
441  p89ab);
442 }
443 #endif
444 
445 template <class Pixel>
447  const Pixel* __restrict in, Pixel* __restrict out,
448  size_t dstWidth)
449 {
450  /* The following code is equivalent to this loop. It is 2x unrolled
451  * and common subexpressions have been eliminated. The last iteration
452  * is also moved outside the for loop.
453  *
454  * unsigned c0 = blur / 2;
455  * unsigned c1 = c0 + blur;
456  * unsigned c2 = 256 - c1;
457  * unsigned c3 = 256 - 2 * c0;
458  * Pixel prev, curr, next;
459  * prev = curr = next = in[0];
460  * size_t srcWidth = dstWidth / 3;
461  * for (unsigned x = 0; x < srcWidth; ++x) {
462  * if (x != (srcWidth - 1)) next = in[x + 1];
463  * out[3 * x + 0] = mul(c1, prev) + mul(c2, curr);
464  * out[3 * x + 1] = mul(c0, prev) + mul(c3, curr) + mul(c0, next);
465  * out[3 * x + 2] = mul(c2, curr) + mul(c1, next);
466  * prev = curr;
467  * curr = next;
468  * }
469  */
470  size_t srcWidth = dstWidth / 3;
471 #ifdef __SSE2__
472  if (sizeof(Pixel) == 4) {
473  blur_SSE(in, out, srcWidth);
474  return;
475  }
476 #endif
477 
478  // C++ routine, both 16bpp and 32bpp
479  unsigned c0 = blur / 2;
480  unsigned c1 = blur + c0;
481  unsigned c2 = 256 - c1;
482  unsigned c3 = 256 - 2 * c0;
483  mult0.setFactor32(c0);
484  mult1.setFactor32(c1);
485  mult2.setFactor32(c2);
486  mult3.setFactor32(c3);
487 
488  Pixel p0 = in[0];
489  Pixel p1;
490  uint32_t f0 = mult0.mul32(p0);
491  uint32_t f1 = mult1.mul32(p0);
492  uint32_t g0 = f0;
493  uint32_t g1 = f1;
494 
495  size_t x;
496  for (x = 0; x < (srcWidth - 2); x += 2) {
497  uint32_t g2 = mult2.mul32(p0);
498  out[3 * x + 0] = mult0.conv32(g2 + f1);
499  p1 = in[x + 1];
500  uint32_t t0 = mult0.mul32(p1);
501  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
502  f0 = t0;
503  f1 = mult1.mul32(p1);
504  out[3 * x + 2] = mult0.conv32(g2 + f1);
505 
506  uint32_t f2 = mult2.mul32(p1);
507  out[3 * x + 3] = mult0.conv32(f2 + g1);
508  p0 = in[x + 2];
509  uint32_t t1 = mult0.mul32(p0);
510  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + t1);
511  g0 = t1;
512  g1 = mult1.mul32(p0);
513  out[3 * x + 5] = mult0.conv32(g1 + f2);
514  }
515  uint32_t g2 = mult2.mul32(p0);
516  out[3 * x + 0] = mult0.conv32(g2 + f1);
517  p1 = in[x + 1];
518  uint32_t t0 = mult0.mul32(p1);
519  out[3 * x + 1] = mult0.conv32(f0 + mult3.mul32(p0) + t0);
520  f0 = t0;
521  f1 = mult1.mul32(p1);
522  out[3 * x + 2] = mult0.conv32(g2 + f1);
523 
524  uint32_t f2 = mult2.mul32(p1);
525  out[3 * x + 3] = mult0.conv32(f2 + g1);
526  out[3 * x + 4] = mult0.conv32(g0 + mult3.mul32(p1) + f0);
527  out[3 * x + 5] = p1;
528 }
529 
530 template <class Pixel>
532  FrameSource& src, const RawFrame* superImpose,
533  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
534  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
535 {
536  if (superImpose) {
537  SuperImposedVideoFrame<Pixel> sf(src, *superImpose, pixelOps);
538  srcWidth = sf.getLineWidth(srcStartY);
539  this->dispatchScale(sf, srcStartY, srcEndY, srcWidth,
540  dst, dstStartY, dstEndY);
541  } else {
542  this->dispatchScale(src, srcStartY, srcEndY, srcWidth,
543  dst, dstStartY, dstEndY);
544  }
545 }
546 
547 // Force template instantiation.
548 #if HAVE_16BPP
549 template class Simple3xScaler<uint16_t>;
550 #endif
551 #if HAVE_32BPP
552 template class Simple3xScaler<uint32_t>;
553 #endif
554 
555 } // namespace openmsx
int getScanlineFactor() const
The alpha value [0..255] of the gap between scanlines.
This class represents a frame that is the (per-pixel) alpha-blend of a (laser-disc) video frame and a...
Helper class to perform &#39;pixel x scalar&#39; calculations.
Definition: Multiply32.hh:14
void setBlur(unsigned blur_)
virtual void fillLine(unsigned y, Pixel color)=0
Simple3xScaler(const PixelOperations< Pixel > &pixelOps, const RenderSettings &renderSettings)
virtual Pixel * acquireLine(unsigned y)=0
Polymorphic wrapper around another line scaler.
Definition: LineScalers.hh:310
Polymorphic line scaler.
Definition: LineScalers.hh:282
uint32_t Pixel
Interface for getting lines from a video frame.
Definition: FrameSource.hh:14
mat4 scale(const vec3 &xyz)
Definition: gl_transform.hh:19
virtual unsigned getHeight() const =0
A video frame as output by the VDP scanline conversion unit, before any postprocessing filters are ap...
Definition: RawFrame.hh:25
int getBlurFactor() const
The amount of horizontal blur [0..256].
Blur_1on3(const PixelOperations< Pixel > &pixelOps)
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
virtual void releaseLine(unsigned y, Pixel *buf)=0
Like PolyScale above, but instead keeps a reference to the actual scaler.
Definition: LineScalers.hh:337
Class containing all settings for renderers.
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:91
void operator()(const Pixel *in, Pixel *out, size_t dstWidth)
unsigned getLineWidth(unsigned line) const override
Gets the number of display pixels on the given line.
const Pixel getLineColor(unsigned line) const
Get the (single) color of the given line.
Definition: FrameSource.hh:74
virtual unsigned getLineWidth(unsigned line) const =0
Gets the number of display pixels on the given line.
std::unique_ptr< T > make_unique()
Definition: memory.hh:27
void dispatchScale(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Definition: Scaler3.cc:223
virtual unsigned getWidth() const =0
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
Base class for 3x scalers.
Definition: Scaler3.hh:11