openMSX
Deflicker.cc
Go to the documentation of this file.
1 #include "Deflicker.hh"
2 #include "RawFrame.hh"
3 #include "PixelOperations.hh"
4 #include "one_of.hh"
5 #include "unreachable.hh"
6 #include "vla.hh"
7 #include "xrange.hh"
8 #include "build-info.hh"
9 #include <memory>
10 #ifdef __SSE2__
11 #include <emmintrin.h>
12 #endif
13 
14 namespace openmsx {
15 
16 template<typename Pixel> class DeflickerImpl final : public Deflicker
17 {
18 public:
20  std::unique_ptr<RawFrame>* lastFrames);
21 
22 private:
23  [[nodiscard]] const void* getLineInfo(
24  unsigned line, unsigned& width,
25  void* buf, unsigned bufWidth) const override;
26 
27 private:
28  PixelOperations<Pixel> pixelOps;
29 };
30 
31 
32 std::unique_ptr<Deflicker> Deflicker::create(
33  const PixelFormat& format,
34  std::unique_ptr<RawFrame>* lastFrames)
35 {
36 #if HAVE_16BPP
37  if (format.getBytesPerPixel() == 2) {
38  return std::make_unique<DeflickerImpl<uint16_t>>(format, lastFrames);
39  }
40 #endif
41 #if HAVE_32BPP
42  if (format.getBytesPerPixel() == 4) {
43  return std::make_unique<DeflickerImpl<uint32_t>>(format, lastFrames);
44  }
45 #endif
46  UNREACHABLE; return nullptr; // avoid warning
47 }
48 
49 
51  std::unique_ptr<RawFrame>* lastFrames_)
53  , lastFrames(lastFrames_)
54 {
55 }
56 
58 {
61 }
62 
63 unsigned Deflicker::getLineWidth(unsigned line) const
64 {
65  return lastFrames[0]->getLineWidthDirect(line);
66 }
67 
68 
69 template<typename Pixel>
71  std::unique_ptr<RawFrame>* lastFrames_)
72  : Deflicker(format, lastFrames_)
73  , pixelOps(format)
74 {
75 }
76 
77 #ifdef __SSE2__
78 template<typename Pixel>
79 static __m128i blend(__m128i x, __m128i y, Pixel blendMask)
80 {
81  if (sizeof(Pixel) == 4) {
82  // 32bpp
83  return _mm_avg_epu8(x, y);
84  } else {
85  // 16bpp, (x & y) + (((x ^ y) & blendMask) >> 1)
86  __m128i m = _mm_set1_epi16(blendMask);
87  __m128i a = _mm_and_si128(x, y);
88  __m128i b = _mm_xor_si128(x, y);
89  __m128i c = _mm_and_si128(b, m);
90  __m128i d = _mm_srli_epi16(c, 1);
91  return _mm_add_epi16(a, d);
92  }
93 }
94 
95 template<typename Pixel>
96 static __m128i uload(const Pixel* ptr, ptrdiff_t byteOffst)
97 {
98  const auto* p8 = reinterpret_cast<const char *>(ptr);
99  const auto* p128 = reinterpret_cast<const __m128i*>(p8 + byteOffst);
100  return _mm_loadu_si128(p128);
101 }
102 
103 template<typename Pixel>
104 static void ustore(Pixel* ptr, ptrdiff_t byteOffst, __m128i val)
105 {
106  auto* p8 = reinterpret_cast< char *>(ptr);
107  auto* p128 = reinterpret_cast<__m128i*>(p8 + byteOffst);
108  return _mm_storeu_si128(p128, val);
109 }
110 
111 template<typename Pixel>
112 static __m128i compare(__m128i x, __m128i y)
113 {
114  static_assert(sizeof(Pixel) == one_of(2u, 4u));
115  if (sizeof(Pixel) == 4) {
116  return _mm_cmpeq_epi32(x, y);
117  } else {
118  return _mm_cmpeq_epi16(x, y);
119  }
120 }
121 #endif
122 
123 template<typename Pixel>
124 const void* DeflickerImpl<Pixel>::getLineInfo(
125  unsigned line, unsigned& width, void* buf_, unsigned bufWidth) const
126 {
127  unsigned width0 = lastFrames[0]->getLineWidthDirect(line);
128  unsigned width1 = lastFrames[1]->getLineWidthDirect(line);
129  unsigned width2 = lastFrames[2]->getLineWidthDirect(line);
130  unsigned width3 = lastFrames[3]->getLineWidthDirect(line);
131  const Pixel* line0 = lastFrames[0]->template getLinePtrDirect<Pixel>(line);
132  const Pixel* line1 = lastFrames[1]->template getLinePtrDirect<Pixel>(line);
133  const Pixel* line2 = lastFrames[2]->template getLinePtrDirect<Pixel>(line);
134  const Pixel* line3 = lastFrames[3]->template getLinePtrDirect<Pixel>(line);
135  if ((width0 != width3) || (width0 != width2) || (width0 != width1)) {
136  // Not all the same width.
137  width = width0;
138  return line0;
139  }
140 
141  // Prefer to write directly to the output buffer, if that's not
142  // possible store the intermediate result in a temp buffer.
143  VLA_SSE_ALIGNED(Pixel, buf2, width0);
144  auto* buf = static_cast<Pixel*>(buf_);
145  Pixel* out = (width0 <= bufWidth) ? buf : buf2;
146 
147  // Detect pixels that alternate between two different color values and
148  // replace those with the average color. We search for an alternating
149  // sequence with length (at least) 4. Or IOW we look for "A B A B".
150  // The implementation below also detects a constant pixel value
151  // "A A A A" as alternating between "A" and "A", but that's fine.
152  Pixel* dst = out;
153  unsigned remaining = width0;
154 #ifdef __SSE2__
155  size_t pixelsPerSSE = sizeof(__m128i) / sizeof(Pixel);
156  size_t widthSSE = remaining & ~(pixelsPerSSE - 1); // rounded down to a multiple of pixels in a SSE register
157  line0 += widthSSE;
158  line1 += widthSSE;
159  line2 += widthSSE;
160  line3 += widthSSE;
161  dst += widthSSE;
162  auto byteOffst = -ptrdiff_t(widthSSE * sizeof(Pixel));
163 
164  Pixel blendMask = pixelOps.getBlendMask();
165  while (byteOffst < 0) {
166  __m128i a0 = uload(line0, byteOffst);
167  __m128i a1 = uload(line1, byteOffst);
168  __m128i a2 = uload(line2, byteOffst);
169  __m128i a3 = uload(line3, byteOffst);
170 
171  __m128i e02 = compare<Pixel>(a0, a2); // a0 == a2
172  __m128i e13 = compare<Pixel>(a1, a3); // a1 == a3
173  __m128i cnd = _mm_and_si128(e02, e13); // (a0==a2) && (a1==a3)
174 
175  __m128i a01 = blend(a0, a1, blendMask);
176  __m128i p = _mm_xor_si128(a0, a01);
177  __m128i q = _mm_and_si128(p, cnd);
178  __m128i r = _mm_xor_si128(q, a0); // select(a0, a01, cnd)
179 
180  ustore(dst, byteOffst, r);
181  byteOffst += sizeof(__m128i);
182  }
183  remaining &= pixelsPerSSE - 1;
184 #endif
185  for (auto x : xrange(remaining)) {
186  dst[x] = ((line0[x] == line2[x]) && (line1[x] == line3[x]))
187  ? pixelOps.template blend<1, 1>(line0[x], line1[x])
188  : line0[x];
189  }
190 
191  if (width0 <= bufWidth) {
192  // It it already fits, we're done
193  width = width0;
194  } else {
195  // Otherwise scale so that it does fit.
196  width = bufWidth;
197  scaleLine(out, buf, width0, bufWidth);
198  }
199  return buf;
200 }
201 
202 } // namespace openmsx
Definition: one_of.hh:7
DeflickerImpl(const PixelFormat &format, std::unique_ptr< RawFrame > *lastFrames)
Definition: Deflicker.cc:70
static std::unique_ptr< Deflicker > create(const PixelFormat &format, std::unique_ptr< RawFrame > *lastFrames)
Definition: Deflicker.cc:32
std::unique_ptr< RawFrame > * lastFrames
Definition: Deflicker.hh:28
Deflicker(const PixelFormat &format, std::unique_ptr< RawFrame > *lastFrames)
Definition: Deflicker.cc:50
unsigned getLineWidth(unsigned line) const override
Gets the number of display pixels on the given line.
Definition: Deflicker.cc:63
Interface for getting lines from a video frame.
Definition: FrameSource.hh:15
void setHeight(unsigned height_)
Definition: FrameSource.hh:198
void init(FieldType fieldType_)
(Re)initialize an existing FrameSource.
Definition: FrameSource.hh:34
unsigned getHeight() const
Gets the number of lines in this frame.
Definition: FrameSource.hh:44
@ FIELD_NONINTERLACED
Interlacing is off for this frame.
Definition: FrameSource.hh:22
void format(SectorAccessibleDisk &disk, bool dos1)
Format the given disk (= a single partition).
This file implemented 3 utility functions:
Definition: Autofire.cc:5
uint32_t Pixel
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:124
#define UNREACHABLE
Definition: unreachable.hh:38
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
constexpr auto xrange(T e)
Definition: xrange.hh:155