21 #include <emmintrin.h>
30 template<std::
unsigned_
integral Pixel>
35 if (factor == 0.0f)
return;
42 if constexpr (
sizeof(
Pixel) == 4) {
53 scale[pixelOps.red (p)] = factor;
54 scale[pixelOps.green(p)] = factor;
55 scale[pixelOps.blue (p)] = factor;
58 scale[0] = (pixelOps.getMaxRed() / 255.0f) * factor;
59 scale[1] = (pixelOps.getMaxGreen() / 255.0f) * factor;
60 scale[2] = (pixelOps.getMaxBlue() / 255.0f) * factor;
65 std::normal_distribution<float> distribution(0.0f, 1.0f);
67 float r = distribution(generator);
76 static inline void drawNoiseLineSse2(uint32_t* buf_,
signed char* noise,
size_t width)
91 ptrdiff_t
x = width *
sizeof(uint32_t);
92 assert((
x & 63) == 0);
93 assert((uintptr_t(buf_) & 15) == 0);
95 char* buf =
reinterpret_cast<char*
>(buf_) +
x;
96 char* nse =
reinterpret_cast<char*
>(noise) +
x;
99 __m128i b7 = _mm_set1_epi8(-128);
101 __m128i i0 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf +
x + 0));
102 __m128i i1 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf +
x + 16));
103 __m128i i2 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf +
x + 32));
104 __m128i
i3 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf +
x + 48));
105 __m128i n0 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse +
x + 0));
106 __m128i n1 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse +
x + 16));
107 __m128i n2 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse +
x + 32));
108 __m128i
n3 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse +
x + 48));
109 __m128i o0 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(i0, b7), n0), b7);
110 __m128i o1 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(i1, b7), n1), b7);
111 __m128i o2 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(i2, b7), n2), b7);
112 __m128i
o3 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(
i3, b7),
n3), b7);
113 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf +
x + 0), o0);
114 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf +
x + 16), o1);
115 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf +
x + 32), o2);
116 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf +
x + 48),
o3);
117 x += 4 *
sizeof(__m128i);
127 static constexpr uint32_t addNoise4(uint32_t p, uint32_t n)
135 uint32_t ci = (p ^ n ^ s0) & 0x01010100;
136 uint32_t s = s0 - ci;
149 uint32_t
t = (p ^ n) & (p ^ s) & 0x80808080;
155 uint32_t u8 = (u1 << 1) - (u1 >> 7);
158 uint32_t o8 = (o1 << 1) - (o1 >> 7);
161 return (s & (~u8)) | o8;
164 template<std::
unsigned_
integral Pixel>
165 void FBPostProcessor<Pixel>::drawNoiseLine(
166 Pixel* buf,
signed char* noise,
size_t width)
169 if constexpr (
sizeof(
Pixel) == 4) {
172 auto* buf32 =
reinterpret_cast<uint32_t*
>(buf);
173 drawNoiseLineSse2(buf32, noise, width);
178 if constexpr (
sizeof(
Pixel) == 4) {
180 auto* noise4 =
reinterpret_cast<uint32_t*
>(noise);
181 for (
auto i :
xrange(width)) {
182 buf[i] = addNoise4(buf[i], noise4[i]);
185 int mr = pixelOps.getMaxRed();
186 int mg = pixelOps.getMaxGreen();
187 int mb = pixelOps.getMaxBlue();
188 for (
auto i :
xrange(width)) {
190 int r = pixelOps.red(p);
191 int g = pixelOps.green(p);
192 int b = pixelOps.blue(p);
194 r += noise[4 * i + 0];
195 g += noise[4 * i + 1];
196 b += noise[4 * i + 2];
202 buf[i] = pixelOps.combine(r,
g, b);
207 template<std::
unsigned_
integral Pixel>
208 void FBPostProcessor<Pixel>::drawNoise(OutputSurface& output_)
210 if (renderSettings.getNoise() == 0.0f)
return;
212 auto& output = checked_cast<SDLOutputSurface&>(output_);
213 auto [w, h] = output.getLogicalSize();
214 auto pixelAccess = output.getDirectPixelAccess();
215 for (
auto y :
xrange(h)) {
216 auto* buf = pixelAccess.getLinePtr<
Pixel>(y);
217 drawNoiseLine(buf, &noiseBuf[noiseShift[y]], w);
221 template<std::
unsigned_
integral Pixel>
222 void FBPostProcessor<Pixel>::update(
const Setting&
setting) noexcept
225 auto& noiseSetting = renderSettings.getNoiseSetting();
226 if (&
setting == &noiseSetting) {
227 preCalcNoise(noiseSetting.getDouble());
232 template<std::
unsigned_
integral Pixel>
235 unsigned maxWidth_,
unsigned height_,
bool canDoInterlace_)
237 motherBoard_, display_, screen_, videoSource, maxWidth_, height_,
240 , scaleFactor(unsigned(-1))
241 , stretchWidth(unsigned(-1))
242 , noiseShift(screen.getLogicalHeight())
243 , pixelOps(screen.getPixelFormat())
246 noiseSetting.
attach(*
this);
247 preCalcNoise(noiseSetting.getDouble());
251 template<std::
unsigned_
integral Pixel>
254 renderSettings.getNoiseSetting().detach(*
this);
257 template<std::
unsigned_
integral Pixel>
260 auto& output = checked_cast<SDLOutputSurface&>(output_);
261 if (renderSettings.getInterleaveBlackFrame()) {
262 interleaveCount ^= 1;
263 if (interleaveCount) {
264 output.clearScreen();
269 if (!paintFrame)
return;
272 auto algo = renderSettings.getScaleAlgorithm();
273 unsigned factor = renderSettings.getScaleFactor();
274 unsigned inWidth = lrintf(renderSettings.getHorizontalStretch());
275 if ((scaleAlgorithm != algo) || (scaleFactor != factor) ||
276 (inWidth != stretchWidth) || (lastOutput != &output)) {
277 scaleAlgorithm = algo;
278 scaleFactor = factor;
279 stretchWidth = inWidth;
280 lastOutput = &output;
285 output, pixelOps, inWidth);
289 const unsigned srcHeight = paintFrame->getHeight();
290 const unsigned dstHeight = output.getLogicalHeight();
292 unsigned g = std::gcd(srcHeight, dstHeight);
293 unsigned srcStep = srcHeight /
g;
294 unsigned dstStep = dstHeight /
g;
298 unsigned srcStartY = 0;
299 unsigned dstStartY = 0;
300 while (dstStartY < dstHeight) {
303 assert(srcStartY < srcHeight);
306 unsigned lineWidth = getLineWidth(paintFrame, srcStartY, srcStep);
307 unsigned srcEndY = srcStartY + srcStep;
308 unsigned dstEndY = dstStartY + dstStep;
309 while ((srcEndY < srcHeight) && (dstEndY < dstHeight) &&
310 (getLineWidth(paintFrame, srcEndY, srcStep) == lineWidth)) {
318 currScaler->scaleImage(
319 *paintFrame, superImposeVideoFrame,
320 srcStartY, srcEndY, lineWidth,
321 *stretchScaler, dstStartY, dstEndY);
330 output.flushFrameBuffer();
333 template<std::
unsigned_
integral Pixel>
335 std::unique_ptr<RawFrame> finishedFrame, EmuTime::param time)
338 std::uniform_int_distribution<int> distribution(0,
NOISE_SHIFT / 16 - 1);
339 for (
auto y :
xrange(screen.getLogicalHeight())) {
340 noiseShift[y] = distribution(generator) * 16;
Represents the output window/screen of openMSX.
FBPostProcessor(MSXMotherBoard &motherBoard, Display &display, OutputSurface &screen, const std::string &videoSource, unsigned maxWidth, unsigned height, bool canDoInterlace)
~FBPostProcessor() override
void paint(OutputSurface &output) override
Paint this layer.
std::unique_ptr< RawFrame > rotateFrames(std::unique_ptr< RawFrame > finishedFrame, EmuTime::param time) override
Sets up the "abcdFrame" variables for a new frame.
A frame buffer where pixels can be written to.
int getLogicalWidth() const
Abstract base class for post processors.
virtual std::unique_ptr< RawFrame > rotateFrames(std::unique_ptr< RawFrame > finishedFrame, EmuTime::param time)
Sets up the "abcdFrame" variables for a new frame.
RenderSettings & renderSettings
Render settings.
OutputSurface & screen
The surface which is visible to the user.
Class containing all settings for renderers.
FloatSetting & getNoiseSetting()
The amount of noise to add to the frame.
static std::unique_ptr< Scaler< Pixel > > createScaler(const PixelOperations< Pixel > &pixelOps, RenderSettings &renderSettings)
Instantiates a Scaler.
void attach(Observer< T > &observer)
void update(const Setting &setting) noexcept override
mat3 n3(vec3(1, 0, 3), vec3(4, 5, 6), vec3(7, 8, 9))
mat3 o3(vec3(1, 2, 3), vec3(4, 5, 0), vec3(7, 8, 9))
imat3 i3(ivec3(1, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
constexpr vecN< N, T > clamp(const vecN< N, T > &x, const vecN< N, T > &minVal, const vecN< N, T > &maxVal)
constexpr mat4 scale(const vec3 &xyz)
This file implemented 3 utility functions:
constexpr unsigned NOISE_SHIFT
constexpr unsigned NOISE_BUF_SIZE
constexpr KeyMatrixPosition x
Keyboard bindings.
auto & global_urng()
Return reference to a (shared) global random number generator.
static std::unique_ptr< ScalerOutput< Pixel > > create(SDLOutputSurface &output, PixelOperations< Pixel > pixelOps, unsigned inWidth)
constexpr auto xrange(T e)