openMSX
Simple2xScaler.cc
Go to the documentation of this file.
1 #include "Simple2xScaler.hh"
3 #include "LineScalers.hh"
4 #include "RawFrame.hh"
5 #include "ScalerOutput.hh"
6 #include "RenderSettings.hh"
7 #include "unreachable.hh"
8 #include "vla.hh"
9 #include <cassert>
10 #include <cstddef>
11 #include <cstdint>
12 #ifdef __SSE2__
13 #include <emmintrin.h>
14 #endif
15 
16 namespace openmsx {
17 
18 // class Simple2xScaler
19 
20 template <class Pixel>
22  const PixelOperations<Pixel>& pixelOps_,
23  RenderSettings& renderSettings)
24  : Scaler2<Pixel>(pixelOps_)
25  , settings(renderSettings)
26  , pixelOps(pixelOps_)
27  , mult1(pixelOps)
28  , mult2(pixelOps)
29  , mult3(pixelOps)
30  , scanline(pixelOps)
31 {
32 }
33 
34 template <class Pixel>
36  FrameSource& src, unsigned srcStartY, unsigned srcEndY,
37  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
38 {
39  int scanlineFactor = settings.getScanlineFactor();
40 
41  unsigned dstHeight = dst.getHeight();
42  unsigned stopDstY = (dstEndY == dstHeight)
43  ? dstEndY : dstEndY - 2;
44  unsigned srcY = srcStartY, dstY = dstStartY;
45  for (/* */; dstY < stopDstY; srcY += 1, dstY += 2) {
46  Pixel color0 = src.getLineColor<Pixel>(srcY);
47  dst.fillLine(dstY + 0, color0);
48  Pixel color1 = scanline.darken(color0, scanlineFactor);
49  dst.fillLine(dstY + 1, color1);
50  }
51  if (dstY != dstHeight) {
52  unsigned nextLineWidth = src.getLineWidth(srcY + 1);
53  assert(src.getLineWidth(srcY) == 1);
54  assert(nextLineWidth != 1);
55  this->dispatchScale(src, srcY, srcEndY, nextLineWidth,
56  dst, dstY, dstEndY);
57  }
58 }
59 
60 #ifdef __SSE2__
61 
62 // Combines upper-half of 'x' with lower half of 'y'.
63 static inline __m128i shuffle(__m128i x, __m128i y)
64 {
65  // mm_shuffle_pd() actually shuffles 64-bit floating point values, we
66  // need to shuffle integers. Though floats and ints are stored in the
67  // same xmmN registers. So this instruction does the right thing.
68  // However (some?) x86 CPUs keep the float and integer interpretations
69  // of these registers in different physical locations in the chip and
70  // there is some overhead on switching between these interpretations.
71  // So the casts in the statement below don't generate any instructions,
72  // but they still can cause overhead on (some?) CPUs.
73  return _mm_castpd_si128(_mm_shuffle_pd(
74  _mm_castsi128_pd(x), _mm_castsi128_pd(y), 1));
75 }
76 
77 // 32bpp
78 static void blur1on2_SSE2(
79  const uint32_t* __restrict in_, uint32_t* __restrict out_,
80  unsigned c1_, unsigned c2_, size_t width)
81 {
82  width *= sizeof(uint32_t); // in bytes
83  assert(width >= (2 * sizeof(__m128i)));
84  assert((reinterpret_cast<uintptr_t>(in_ ) % sizeof(__m128i)) == 0);
85  assert((reinterpret_cast<uintptr_t>(out_) % sizeof(__m128i)) == 0);
86 
87  ptrdiff_t x = -ptrdiff_t(width - sizeof(__m128i));
88  auto* in = reinterpret_cast<const char*>(in_ ) - x;
89  auto* out = reinterpret_cast< char*>(out_) - 2 * x;
90 
91  // Setup first iteration
92  __m128i c1 = _mm_set1_epi16(c1_);
93  __m128i c2 = _mm_set1_epi16(c2_);
94  __m128i zero = _mm_setzero_si128();
95 
96  __m128i abcd = *reinterpret_cast<const __m128i*>(in);
97  __m128i a0b0 = _mm_unpacklo_epi8(abcd, zero);
98  __m128i d0a0 = _mm_shuffle_epi32(a0b0, 0x44);
99  __m128i d1a1 = _mm_mullo_epi16(c1, d0a0);
100 
101  // Each iteration reads 4 pixels and generates 8 pixels
102  do {
103  // At the start of each iteration these variables are live:
104  // abcd, a0b0, d1a1
105  __m128i c0d0 = _mm_unpackhi_epi8(abcd, zero);
106  __m128i b0c0 = shuffle(a0b0, c0d0);
107  __m128i a2b2 = _mm_mullo_epi16(c2, a0b0);
108  __m128i b1c1 = _mm_mullo_epi16(c1, b0c0);
109  __m128i daab = _mm_srli_epi16(_mm_add_epi16(d1a1, a2b2), 8);
110  __m128i abbc = _mm_srli_epi16(_mm_add_epi16(a2b2, b1c1), 8);
111  __m128i abab = _mm_packus_epi16(daab, abbc);
112  *reinterpret_cast<__m128i*>(out + 2 * x) =
113  _mm_shuffle_epi32(abab, 0xd8);
114  abcd = *reinterpret_cast<const __m128i*>(in + x + 16);
115  a0b0 = _mm_unpacklo_epi8(abcd, zero);
116  __m128i d0a0_= shuffle(c0d0, a0b0);
117  __m128i c2d2 = _mm_mullo_epi16(c2, c0d0);
118  d1a1 = _mm_mullo_epi16(c1, d0a0_);
119  __m128i bccd = _mm_srli_epi16(_mm_add_epi16(b1c1, c2d2), 8);
120  __m128i cdda = _mm_srli_epi16(_mm_add_epi16(c2d2, d1a1), 8);
121  __m128i cdcd = _mm_packus_epi16(bccd, cdda);
122  *reinterpret_cast<__m128i*>(out + 2 * x + 16) =
123  _mm_shuffle_epi32(cdcd, 0xd8);
124  x += 16;
125  } while (x < 0);
126 
127  // Last iteration (because this doesn't need to read new input)
128  __m128i c0d0 = _mm_unpackhi_epi8(abcd, zero);
129  __m128i b0c0 = shuffle(a0b0, c0d0);
130  __m128i a2b2 = _mm_mullo_epi16(c2, a0b0);
131  __m128i b1c1 = _mm_mullo_epi16(c1, b0c0);
132  __m128i daab = _mm_srli_epi16(_mm_add_epi16(d1a1, a2b2), 8);
133  __m128i abbc = _mm_srli_epi16(_mm_add_epi16(a2b2, b1c1), 8);
134  __m128i abab = _mm_packus_epi16(daab, abbc);
135  *reinterpret_cast<__m128i*>(out) = _mm_shuffle_epi32(abab, 0xd8);
136  __m128i d0d0 = _mm_shuffle_epi32(c0d0, 0xee);
137  __m128i c2d2 = _mm_mullo_epi16(c2, c0d0);
138  __m128i d1d1 = _mm_mullo_epi16(c1, d0d0);
139  __m128i bccd = _mm_srli_epi16(_mm_add_epi16(b1c1, c2d2), 8);
140  __m128i cddd = _mm_srli_epi16(_mm_add_epi16(c2d2, d1d1), 8);
141  __m128i cdcd = _mm_packus_epi16(bccd, cddd);
142  *reinterpret_cast<__m128i*>(out + 16) = _mm_shuffle_epi32(cdcd, 0xd8);
143 }
144 
145 // no SSE2 16bpp routine yet (probably not worth the effort)
146 static void blur1on2_SSE2(const uint16_t* /*in*/, uint16_t* /*out*/,
147  unsigned /*c1*/, unsigned /*c2*/, size_t /*width*/)
148 {
149  UNREACHABLE;
150 }
151 
152 #endif
153 
154 template <class Pixel>
156  const Pixel* __restrict pIn, Pixel* __restrict pOut,
157  unsigned alpha, size_t srcWidth)
158 {
159  /* This routine is functionally equivalent to the following:
160  *
161  * void blur1on2(const Pixel* pIn, Pixel* pOut, unsigned alpha)
162  * {
163  * unsigned c1 = alpha / 4;
164  * unsigned c2 = 256 - c1;
165  *
166  * Pixel prev, curr, next;
167  * prev = curr = pIn[0];
168  *
169  * unsigned x;
170  * for (x = 0; x < (srcWidth - 1); ++x) {
171  * pOut[2 * x + 0] = (c1 * prev + c2 * curr) >> 8;
172  * Pixel next = pIn[x + 1];
173  * pOut[2 * x + 1] = (c1 * next + c2 * curr) >> 8;
174  * prev = curr;
175  * curr = next;
176  * }
177  *
178  * pOut[2 * x + 0] = (c1 * prev + c2 * curr) >> 8;
179  * next = curr;
180  * pOut[2 * x + 1] = (c1 * next + c2 * curr) >> 8;
181  * }
182  */
183 
184  if (alpha == 0) {
186  scale(pIn, pOut, 2 * srcWidth);
187  return;
188  }
189 
190  assert(alpha <= 256);
191  unsigned c1 = alpha / 4;
192  unsigned c2 = 256 - c1;
193 
194 #ifdef __SSE2__
195  if (sizeof(Pixel) == 4) {
196  // SSE2, only 32bpp
197  blur1on2_SSE2(pIn, pOut, c1, c2, srcWidth);
198  return;
199  }
200 #endif
201  // C++ routine, both 16bpp and 32bpp.
202  // The loop is 2x unrolled and all common subexpressions and redundant
203  // assignments have been eliminated. 1 iteration generates 4 pixels.
204  mult1.setFactor32(c1);
205  mult2.setFactor32(c2);
206 
207  Pixel p0 = pIn[0];
208  Pixel p1;
209  unsigned f0 = mult1.mul32(p0);
210  unsigned f1 = f0;
211  unsigned tmp;
212 
213  unsigned x;
214  for (x = 0; x < (srcWidth - 2); x += 2) {
215  tmp = mult2.mul32(p0);
216  pOut[2 * x + 0] = mult1.conv32(f1 + tmp);
217 
218  p1 = pIn[x + 1];
219  f1 = mult1.mul32(p1);
220  pOut[2 * x + 1] = mult1.conv32(f1 + tmp);
221 
222  tmp = mult2.mul32(p1);
223  pOut[2 * x + 2] = mult1.conv32(f0 + tmp);
224 
225  p0 = pIn[x + 2];
226  f0 = mult1.mul32(p0);
227  pOut[2 * x + 3] = mult1.conv32(f0 + tmp);
228  }
229 
230  tmp = mult2.mul32(p0);
231  pOut[2 * x + 0] = mult1.conv32(f1 + tmp);
232 
233  p1 = pIn[x + 1];
234  f1 = mult1.mul32(p1);
235  pOut[2 * x + 1] = mult1.conv32(f1 + tmp);
236 
237  tmp = mult2.mul32(p1);
238  pOut[2 * x + 2] = mult1.conv32(f0 + tmp);
239 
240  pOut[2 * x + 3] = p1;
241 }
242 
243 #ifdef __SSE2__
244 
245 // 32bpp
246 static void blur1on1_SSE2(
247  const uint32_t* __restrict in_, uint32_t* __restrict out_,
248  unsigned c1_, unsigned c2_, size_t width)
249 {
250  width *= sizeof(uint32_t); // in bytes
251  assert(width >= (2 * sizeof(__m128i)));
252  assert((reinterpret_cast<uintptr_t>(in_ ) % sizeof(__m128i)) == 0);
253  assert((reinterpret_cast<uintptr_t>(out_) % sizeof(__m128i)) == 0);
254 
255  ptrdiff_t x = -ptrdiff_t(width - sizeof(__m128i));
256  auto* in = reinterpret_cast<const char*>(in_ ) - x;
257  auto* out = reinterpret_cast< char*>(out_) - x;
258 
259  // Setup first iteration
260  __m128i c1 = _mm_set1_epi16(c1_);
261  __m128i c2 = _mm_set1_epi16(c2_);
262  __m128i zero = _mm_setzero_si128();
263 
264  __m128i abcd = *reinterpret_cast<const __m128i*>(in);
265  __m128i a0b0 = _mm_unpacklo_epi8(abcd, zero);
266  __m128i d0a0 = _mm_shuffle_epi32(a0b0, 0x44);
267 
268  // Each iteration reads 4 pixels and generates 4 pixels
269  do {
270  // At the start of each iteration these variables are live:
271  // abcd, a0b0, d0a0
272  __m128i c0d0 = _mm_unpackhi_epi8(abcd, zero);
273  __m128i b0c0 = shuffle(a0b0, c0d0);
274  __m128i a2b2 = _mm_mullo_epi16(c2, a0b0);
275  __m128i dbac = _mm_mullo_epi16(c1, _mm_add_epi16(d0a0, b0c0));
276  __m128i aabb = _mm_srli_epi16(_mm_add_epi16(dbac, a2b2), 8);
277  abcd = *reinterpret_cast<const __m128i*>(in + x + 16);
278  a0b0 = _mm_unpacklo_epi8(abcd, zero);
279  d0a0 = shuffle(c0d0, a0b0);
280  __m128i c2d2 = _mm_mullo_epi16(c2, c0d0);
281  __m128i bdca = _mm_mullo_epi16(c1, _mm_add_epi16(b0c0, d0a0));
282  __m128i ccdd = _mm_srli_epi16(_mm_add_epi16(bdca, c2d2), 8);
283  *reinterpret_cast<__m128i*>(out + x) =
284  _mm_packus_epi16(aabb, ccdd);
285  x += 16;
286  } while (x < 0);
287 
288  // Last iteration (because this doesn't need to read new input)
289  __m128i c0d0 = _mm_unpackhi_epi8(abcd, zero);
290  __m128i b0c0 = shuffle(a0b0, c0d0);
291  __m128i a2b2 = _mm_mullo_epi16(c2, a0b0);
292  __m128i dbac = _mm_mullo_epi16(c1, _mm_add_epi16(d0a0, b0c0));
293  __m128i aabb = _mm_srli_epi16(_mm_add_epi16(dbac, a2b2), 8);
294  __m128i d0d0 = _mm_shuffle_epi32(c0d0, 0xee);
295  __m128i c2d2 = _mm_mullo_epi16(c2, c0d0);
296  __m128i bdcd = _mm_mullo_epi16(c1, _mm_add_epi16(b0c0, d0d0));
297  __m128i ccdd = _mm_srli_epi16(_mm_add_epi16(bdcd, c2d2), 8);
298  *reinterpret_cast<__m128i*>(out) = _mm_packus_epi16(aabb, ccdd);
299 }
300 
301 // no SSE2 16bpp routine yet (probably not worth the effort)
302 static void blur1on1_SSE2(const uint16_t* /*in*/, uint16_t* /*out*/,
303  unsigned /*c1*/, unsigned /*c2*/, size_t /*width*/)
304 {
305  UNREACHABLE;
306 }
307 
308 #endif
309 template <class Pixel>
311  const Pixel* __restrict pIn, Pixel* __restrict pOut,
312  unsigned alpha, size_t srcWidth)
313 {
314  /* This routine is functionally equivalent to the following:
315  *
316  * void blur1on1(const Pixel* pIn, Pixel* pOut, unsigned alpha)
317  * {
318  * unsigned c1 = alpha / 4;
319  * unsigned c2 = 256 - alpha / 2;
320  *
321  * Pixel prev, curr, next;
322  * prev = curr = pIn[0];
323  *
324  * unsigned x;
325  * for (x = 0; x < (srcWidth - 1); ++x) {
326  * next = pIn[x + 1];
327  * pOut[x] = (c1 * prev + c2 * curr + c1 * next) >> 8;
328  * prev = curr;
329  * curr = next;
330  * }
331  *
332  * next = curr;
333  * pOut[x] = c1 * prev + c2 * curr + c1 * next;
334  * }
335  */
336 
337  if (alpha == 0) {
338  Scale_1on1<Pixel> copy;
339  copy(pIn, pOut, srcWidth);
340  return;
341  }
342 
343  unsigned c1 = alpha / 4;
344  unsigned c2 = 256 - alpha / 2;
345 
346 #ifdef __SSE2__
347  if (sizeof(Pixel) == 4) {
348  // SSE2, only 32bpp
349  blur1on1_SSE2(pIn, pOut, c1, c2, srcWidth);
350  return;
351  }
352 #endif
353  // C++ routine, both 16bpp and 32bpp.
354  // The loop is 2x unrolled and all common subexpressions and redundant
355  // assignments have been eliminated. 1 iteration generates 2 pixels.
356  mult1.setFactor32(c1);
357  mult3.setFactor32(c2);
358 
359  Pixel p0 = pIn[0];
360  Pixel p1;
361  unsigned f0 = mult1.mul32(p0);
362  unsigned f1 = f0;
363 
364  unsigned x;
365  for (x = 0; x < (srcWidth - 2); x += 2) {
366  p1 = pIn[x + 1];
367  unsigned t0 = mult1.mul32(p1);
368  pOut[x] = mult1.conv32(f0 + mult3.mul32(p0) + t0);
369  f0 = t0;
370 
371  p0 = pIn[x + 2];
372  unsigned t1 = mult1.mul32(p0);
373  pOut[x + 1] = mult1.conv32(f1 + mult3.mul32(p1) + t1);
374  f1 = t1;
375  }
376 
377  p1 = pIn[x + 1];
378  unsigned t0 = mult1.mul32(p1);
379  pOut[x] = mult1.conv32(f0 + mult3.mul32(p0) + t0);
380 
381  pOut[x + 1] = mult1.conv32(f1 + mult3.mul32(p1) + t0);
382 }
383 
384 template <class Pixel>
386  const Pixel* in1, const Pixel* in2, Pixel* out, int factor,
387  unsigned dstWidth)
388 {
389  if (factor != 255) {
390  scanline.draw(in1, in2, out, factor, dstWidth);
391  } else {
393  scale(in1, out, dstWidth);
394  }
395 }
396 
397 template <class Pixel>
399  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
400  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
401 {
402  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
403  int blur = settings.getBlurFactor();
404  int scanlineFactor = settings.getScanlineFactor();
405 
406  unsigned dstY = dstStartY;
407  auto* srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
408  auto* dstLine0 = dst.acquireLine(dstY + 0);
409  blur1on2(srcLine, dstLine0, blur, srcWidth);
410 
411  for (; dstY < dstEndY - 2; dstY += 2) {
412  srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
413  auto* dstLine2 = dst.acquireLine(dstY + 2);
414  blur1on2(srcLine, dstLine2, blur, srcWidth);
415 
416  auto* dstLine1 = dst.acquireLine(dstY + 1);
417  drawScanline(dstLine0, dstLine2, dstLine1, scanlineFactor,
418  2 * srcWidth);
419 
420  dst.releaseLine(dstY + 0, dstLine0);
421  dst.releaseLine(dstY + 1, dstLine1);
422  dstLine0 = dstLine2;
423  }
424 
425  srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
426  VLA_SSE_ALIGNED(Pixel, buf2, 2 * srcWidth);
427  blur1on2(srcLine, buf2, blur, srcWidth);
428 
429  auto* dstLine1 = dst.acquireLine(dstY + 1);
430  drawScanline(dstLine0, buf2, dstLine1, scanlineFactor, 2 * srcWidth);
431  dst.releaseLine(dstY + 0, dstLine0);
432  dst.releaseLine(dstY + 1, dstLine1);
433 }
434 
435 template <class Pixel>
437  unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
438  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
439 {
440  VLA_SSE_ALIGNED(Pixel, buf, srcWidth);
441  int blur = settings.getBlurFactor();
442  int scanlineFactor = settings.getScanlineFactor();
443 
444  unsigned dstY = dstStartY;
445  auto* srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
446  auto* dstLine0 = dst.acquireLine(dstY);
447  blur1on1(srcLine, dstLine0, blur, srcWidth);
448 
449  for (; dstY < dstEndY - 2; dstY += 2) {
450  srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
451  auto* dstLine2 = dst.acquireLine(dstY + 2);
452  blur1on1(srcLine, dstLine2, blur, srcWidth);
453 
454  auto* dstLine1 = dst.acquireLine(dstY + 1);
455  drawScanline(dstLine0, dstLine2, dstLine1, scanlineFactor,
456  srcWidth);
457 
458  dst.releaseLine(dstY + 0, dstLine0);
459  dst.releaseLine(dstY + 1, dstLine1);
460  dstLine0 = dstLine2;
461  }
462 
463  srcLine = src.getLinePtr(srcStartY++, srcWidth, buf);
464  VLA_SSE_ALIGNED(Pixel, buf2, srcWidth);
465  blur1on1(srcLine, buf2, blur, srcWidth);
466 
467  auto* dstLine1 = dst.acquireLine(dstY + 1);
468  drawScanline(dstLine0, buf2, dstLine1, scanlineFactor, srcWidth);
469  dst.releaseLine(dstY + 0, dstLine0);
470  dst.releaseLine(dstY + 1, dstLine1);
471 }
472 
473 template <class Pixel>
475  FrameSource& src, const RawFrame* superImpose,
476  unsigned srcStartY, unsigned srcEndY, unsigned srcWidth,
477  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY)
478 {
479  if (superImpose) {
480  // Note: this implementation is different from the openGL
481  // version. Here we first alpha-blend and then scale, so the
482  // video layer will also get blurred (and possibly down-scaled
483  // to MSX resolution). The openGL version will only blur the
484  // MSX frame, then blend with the video frame and then apply
485  // scanlines. I think the openGL version is visually slightly
486  // better, but much more work to implement in software (in
487  // openGL shaders it's very easy). Maybe we can improve this
488  // later (if required at all).
489  SuperImposedVideoFrame<Pixel> sf(src, *superImpose, pixelOps);
490  srcWidth = sf.getLineWidth(srcStartY);
491  this->dispatchScale(sf, srcStartY, srcEndY, srcWidth,
492  dst, dstStartY, dstEndY);
493  } else {
494  this->dispatchScale(src, srcStartY, srcEndY, srcWidth,
495  dst, dstStartY, dstEndY);
496  }
497 }
498 
499 // Force template instantiation.
500 #if HAVE_16BPP
501 template class Simple2xScaler<uint16_t>;
502 #endif
503 #if HAVE_32BPP
504 template class Simple2xScaler<uint32_t>;
505 #endif
506 
507 } // namespace openmsx
This class represents a frame that is the (per-pixel) alpha-blend of a (laser-disc) video frame and a...
Scaler which assigns the color of the original pixel to all pixels in the 2x2 square.
const Pixel getLineColor(unsigned line) const
Get the (single) color of the given line.
Definition: FrameSource.hh:74
virtual void fillLine(unsigned y, Pixel color)=0
virtual Pixel * acquireLine(unsigned y)=0
Base class for 2x scalers.
Definition: Scaler2.hh:11
uint32_t Pixel
Interface for getting lines from a video frame.
Definition: FrameSource.hh:14
void dispatchScale(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY)
Definition: Scaler2.cc:225
mat4 scale(const vec3 &xyz)
Definition: gl_transform.hh:19
virtual unsigned getHeight() const =0
A video frame as output by the VDP scanline conversion unit, before any postprocessing filters are ap...
Definition: RawFrame.hh:25
int getBlurFactor() const
The amount of horizontal blur [0..256].
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
virtual void releaseLine(unsigned y, Pixel *buf)=0
Simple2xScaler(const PixelOperations< Pixel > &pixelOps, RenderSettings &renderSettings)
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:91
Class containing all settings for renderers.
unsigned getLineWidth(unsigned line) const override
Gets the number of display pixels on the given line.
int getScanlineFactor() const
The alpha value [0..255] of the gap between scanlines.
virtual unsigned getLineWidth(unsigned line) const =0
Gets the number of display pixels on the given line.
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
#define UNREACHABLE
Definition: unreachable.hh:35