openMSX
Deflicker.cc
Go to the documentation of this file.
1 #include "Deflicker.hh"
2 #include "RawFrame.hh"
3 #include "PixelOperations.hh"
4 #include "unreachable.hh"
5 #include "vla.hh"
6 #include "build-info.hh"
7 #include <memory>
8 #ifdef __SSE2__
9 #include <emmintrin.h>
10 #endif
11 
12 namespace openmsx {
13 
14 template<typename Pixel> class DeflickerImpl final : public Deflicker
15 {
16 public:
17  DeflickerImpl(const SDL_PixelFormat& format,
18  std::unique_ptr<RawFrame>* lastFrames);
19 
20 private:
21  const void* getLineInfo(
22  unsigned line, unsigned& width,
23  void* buf, unsigned bufWidth) const override;
24 
25  PixelOperations<Pixel> pixelOps;
26 };
27 
28 
29 std::unique_ptr<Deflicker> Deflicker::create(
30  const SDL_PixelFormat& format,
31  std::unique_ptr<RawFrame>* lastFrames)
32 {
33 #if HAVE_16BPP
34  if (format.BitsPerPixel == 15 || format.BitsPerPixel == 16) {
35  return std::make_unique<DeflickerImpl<uint16_t>>(format, lastFrames);
36  }
37 #endif
38 #if HAVE_32BPP
39  if (format.BitsPerPixel == 32) {
40  return std::make_unique<DeflickerImpl<uint32_t>>(format, lastFrames);
41  }
42 #endif
43  UNREACHABLE; return nullptr; // avoid warning
44 }
45 
46 
47 Deflicker::Deflicker(const SDL_PixelFormat& format,
48  std::unique_ptr<RawFrame>* lastFrames_)
49  : FrameSource(format)
50  , lastFrames(lastFrames_)
51 {
52 }
53 
55 {
58 }
59 
60 unsigned Deflicker::getLineWidth(unsigned line) const
61 {
62  return lastFrames[0]->getLineWidthDirect(line);
63 }
64 
65 
66 template<typename Pixel>
67 DeflickerImpl<Pixel>::DeflickerImpl(const SDL_PixelFormat& format,
68  std::unique_ptr<RawFrame>* lastFrames_)
69  : Deflicker(format, lastFrames_)
70  , pixelOps(format)
71 {
72 }
73 
74 #ifdef __SSE2__
75 template<typename Pixel>
76 static __m128i blend(__m128i x, __m128i y, Pixel blendMask)
77 {
78  if (sizeof(Pixel) == 4) {
79  // 32bpp
80  return _mm_avg_epu8(x, y);
81  } else {
82  // 16bpp, (x & y) + (((x ^ y) & blendMask) >> 1)
83  __m128i m = _mm_set1_epi16(blendMask);
84  __m128i a = _mm_and_si128(x, y);
85  __m128i b = _mm_xor_si128(x, y);
86  __m128i c = _mm_and_si128(b, m);
87  __m128i d = _mm_srli_epi16(c, 1);
88  return _mm_add_epi16(a, d);
89  }
90 }
91 
92 template<typename Pixel>
93 static __m128i uload(const Pixel* ptr, ptrdiff_t byteOffst)
94 {
95  auto* p8 = reinterpret_cast<const char *>(ptr);
96  auto* p128 = reinterpret_cast<const __m128i*>(p8 + byteOffst);
97  return _mm_loadu_si128(p128);
98 }
99 
100 template<typename Pixel>
101 static void ustore(Pixel* ptr, ptrdiff_t byteOffst, __m128i val)
102 {
103  auto* p8 = reinterpret_cast< char *>(ptr);
104  auto* p128 = reinterpret_cast<__m128i*>(p8 + byteOffst);
105  return _mm_storeu_si128(p128, val);
106 }
107 
108 template<typename Pixel>
109 static __m128i compare(__m128i x, __m128i y)
110 {
111  static_assert(sizeof(Pixel) == 4 || sizeof(Pixel) == 2, "");
112  if (sizeof(Pixel) == 4) {
113  return _mm_cmpeq_epi32(x, y);
114  } else {
115  return _mm_cmpeq_epi16(x, y);
116  }
117 }
118 #endif
119 
120 template<typename Pixel>
122  unsigned line, unsigned& width, void* buf_, unsigned bufWidth) const
123 {
124  unsigned width0 = lastFrames[0]->getLineWidthDirect(line);
125  unsigned width1 = lastFrames[1]->getLineWidthDirect(line);
126  unsigned width2 = lastFrames[2]->getLineWidthDirect(line);
127  unsigned width3 = lastFrames[3]->getLineWidthDirect(line);
128  const Pixel* line0 = lastFrames[0]->template getLinePtrDirect<Pixel>(line);
129  const Pixel* line1 = lastFrames[1]->template getLinePtrDirect<Pixel>(line);
130  const Pixel* line2 = lastFrames[2]->template getLinePtrDirect<Pixel>(line);
131  const Pixel* line3 = lastFrames[3]->template getLinePtrDirect<Pixel>(line);
132  if ((width0 != width3) || (width0 != width2) || (width0 != width1)) {
133  // Not all the same width.
134  width = width0;
135  return line0;
136  }
137 
138  // Prefer to write directly to the output buffer, if that's not
139  // possible store the intermediate result in a temp buffer.
140  VLA_SSE_ALIGNED(Pixel, buf2, width0);
141  auto* buf = static_cast<Pixel*>(buf_);
142  Pixel* out = (width0 <= bufWidth) ? buf : buf2;
143 
144  // Detect pixels that alternate between two different color values and
145  // replace those with the average color. We search for an alternating
146  // sequence with length (at least) 4. Or IOW we look for "A B A B".
147  // The implementation below also detects a constant pixel value
148  // "A A A A" as alternating between "A" and "A", but that's fine.
149  Pixel* dst = out;
150  unsigned remaining = width0;
151 #ifdef __SSE2__
152  size_t pixelsPerSSE = sizeof(__m128i) / sizeof(Pixel);
153  size_t widthSSE = remaining & ~(pixelsPerSSE - 1); // rounded down to a multiple of pixels in a SSE register
154  line0 += widthSSE;
155  line1 += widthSSE;
156  line2 += widthSSE;
157  line3 += widthSSE;
158  dst += widthSSE;
159  auto byteOffst = -ptrdiff_t(widthSSE * sizeof(Pixel));
160 
161  Pixel blendMask = pixelOps.getBlendMask();
162  while (byteOffst < 0) {
163  __m128i a0 = uload(line0, byteOffst);
164  __m128i a1 = uload(line1, byteOffst);
165  __m128i a2 = uload(line2, byteOffst);
166  __m128i a3 = uload(line3, byteOffst);
167 
168  __m128i e02 = compare<Pixel>(a0, a2); // a0 == a2
169  __m128i e13 = compare<Pixel>(a1, a3); // a1 == a3
170  __m128i cnd = _mm_and_si128(e02, e13); // (a0==a2) && (a1==a3)
171 
172  __m128i a01 = blend(a0, a1, blendMask);
173  __m128i p = _mm_xor_si128(a0, a01);
174  __m128i q = _mm_and_si128(p, cnd);
175  __m128i r = _mm_xor_si128(q, a0); // select(a0, a01, cnd)
176 
177  ustore(dst, byteOffst, r);
178  byteOffst += sizeof(__m128i);
179  }
180  remaining &= pixelsPerSSE - 1;
181 #endif
182  for (unsigned x = 0; x < remaining; ++x) {
183  dst[x] = ((line0[x] == line2[x]) && (line1[x] == line3[x]))
184  ? pixelOps.template blend<1, 1>(line0[x], line1[x])
185  : line0[x];
186  }
187 
188  if (width0 <= bufWidth) {
189  // It it already fits, we're done
190  width = width0;
191  } else {
192  // Otherwise scale so that it does fit.
193  width = bufWidth;
194  scaleLine(out, buf, width0, bufWidth);
195  }
196  return buf;
197 }
198 
199 } // namespace openmsx
void init(FieldType fieldType_)
(Re)initialize an existing FrameSource.
Definition: FrameSource.hh:34
Deflicker(const SDL_PixelFormat &format, std::unique_ptr< RawFrame > *lastFrames)
Definition: Deflicker.cc:47
static std::unique_ptr< Deflicker > create(const SDL_PixelFormat &format, std::unique_ptr< RawFrame > *lastFrames)
Definition: Deflicker.cc:29
uint32_t Pixel
Interface for getting lines from a video frame.
Definition: FrameSource.hh:14
unsigned getLineWidth(unsigned line) const override
Gets the number of display pixels on the given line.
Definition: Deflicker.cc:60
DeflickerImpl(const SDL_PixelFormat &format, std::unique_ptr< RawFrame > *lastFrames)
Definition: Deflicker.cc:67
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
void scaleLine(const Pixel *in, Pixel *out, unsigned inWidth, unsigned outWidth) const
Definition: FrameSource.cc:70
Interlacing is off for this frame.
Definition: FrameSource.hh:22
void format(SectorAccessibleDisk &disk, bool dos1)
Format the given disk (= a single partition).
void setHeight(unsigned height_)
Definition: FrameSource.hh:198
std::unique_ptr< RawFrame > * lastFrames
Definition: Deflicker.hh:27
unsigned getHeight() const
Gets the number of lines in this frame.
Definition: FrameSource.hh:44
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
#define UNREACHABLE
Definition: unreachable.hh:38