28static constexpr unsigned NOISE_SHIFT = 8192;
29ALIGNAS_SSE static std::array<int8_t, 2 * NOISE_SHIFT> noiseBuf;
31template<std::
unsigned_
integral Pixel>
32void FBPostProcessor<Pixel>::preCalcNoise(
float factor)
36 if (factor == 0.0f)
return;
42 std::array<float, 4>
scale;
43 if constexpr (
sizeof(
Pixel) == 4) {
54 scale[pixelOps.red (p)] = factor;
55 scale[pixelOps.green(p)] = factor;
56 scale[pixelOps.blue (p)] = factor;
59 scale[0] = (narrow_cast<float>(pixelOps.getMaxRed()) / 255.0f) * factor;
60 scale[1] = (narrow_cast<float>(pixelOps.getMaxGreen()) / 255.0f) * factor;
61 scale[2] = (narrow_cast<float>(pixelOps.getMaxBlue()) / 255.0f) * factor;
66 std::normal_distribution<float> distribution(0.0f, 1.0f);
67 for (
unsigned i = 0; i < noiseBuf.size(); i += 4) {
68 float r = distribution(generator);
69 noiseBuf[i + 0] = narrow<int8_t>(
std::clamp(
int(roundf(r *
scale[0])), -128, 127));
70 noiseBuf[i + 1] = narrow<int8_t>(
std::clamp(
int(roundf(r *
scale[1])), -128, 127));
71 noiseBuf[i + 2] = narrow<int8_t>(
std::clamp(
int(roundf(r *
scale[2])), -128, 127));
72 noiseBuf[i + 3] = narrow<int8_t>(
std::clamp(
int(roundf(r *
scale[3])), -128, 127));
77static inline void drawNoiseLineSse2(uint32_t* buf_,
signed char* noise,
size_t width)
92 auto x = narrow<ptrdiff_t>(width *
sizeof(uint32_t));
93 assert((x & 63) == 0);
94 assert((uintptr_t(buf_) & 15) == 0);
96 char* buf =
reinterpret_cast<char*
>(buf_) + x;
97 char* nse =
reinterpret_cast<char*
>(noise) + x;
100 __m128i b7 = _mm_set1_epi8(-128);
102 __m128i i0 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf + x + 0));
103 __m128i i1 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf + x + 16));
104 __m128i i2 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf + x + 32));
105 __m128i
i3 = _mm_load_si128(
reinterpret_cast<__m128i*
>(buf + x + 48));
106 __m128i n0 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse + x + 0));
107 __m128i n1 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse + x + 16));
108 __m128i n2 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse + x + 32));
109 __m128i
n3 = _mm_load_si128(
reinterpret_cast<__m128i*
>(nse + x + 48));
110 __m128i o0 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(i0, b7), n0), b7);
111 __m128i o1 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(i1, b7), n1), b7);
112 __m128i o2 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(i2, b7), n2), b7);
113 __m128i
o3 = _mm_xor_si128(_mm_adds_epi8(_mm_xor_si128(
i3, b7),
n3), b7);
114 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf + x + 0), o0);
115 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf + x + 16), o1);
116 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf + x + 32), o2);
117 _mm_store_si128(
reinterpret_cast<__m128i*
>(buf + x + 48),
o3);
118 x += 4 *
sizeof(__m128i);
128static constexpr uint32_t addNoise4(uint32_t p, uint32_t n)
136 uint32_t ci = (p ^ n ^ s0) & 0x01010100;
137 uint32_t s = s0 - ci;
150 uint32_t
t = (p ^ n) & (p ^ s) & 0x80808080;
156 uint32_t u8 = (u1 << 1) - (u1 >> 7);
159 uint32_t o8 = (o1 << 1) - (o1 >> 7);
162 return (s & (~u8)) | o8;
165template<std::
unsigned_
integral Pixel>
166void FBPostProcessor<Pixel>::drawNoiseLine(
167 std::span<Pixel> buf,
signed char* noise)
169 auto width = buf.
size();
171 if constexpr (
sizeof(
Pixel) == 4) {
174 auto* buf32 =
reinterpret_cast<uint32_t*
>(buf.data());
175 drawNoiseLineSse2(buf32, noise, width);
180 if constexpr (
sizeof(
Pixel) == 4) {
182 auto* noise4 =
reinterpret_cast<uint32_t*
>(noise);
183 for (
auto i :
xrange(width)) {
184 buf[i] = addNoise4(buf[i], noise4[i]);
187 int mr = pixelOps.getMaxRed();
188 int mg = pixelOps.getMaxGreen();
189 int mb = pixelOps.getMaxBlue();
190 for (
auto i :
xrange(width)) {
192 int r = pixelOps.red(p);
193 int g = pixelOps.green(p);
194 int b = pixelOps.blue(p);
196 r += noise[4 * i + 0];
197 g += noise[4 * i + 1];
198 b += noise[4 * i + 2];
204 buf[i] = pixelOps.combine(r,
g, b);
209template<std::
unsigned_
integral Pixel>
210void FBPostProcessor<Pixel>::drawNoise(OutputSurface& output_)
212 if (renderSettings.getNoise() == 0.0f)
return;
214 auto& output = checked_cast<SDLOutputSurface&>(output_);
215 auto [w, h] = output.getLogicalSize();
216 auto pixelAccess = output.getDirectPixelAccess();
217 for (
auto y :
xrange(h)) {
219 drawNoiseLine(buf, &noiseBuf[noiseShift[y]]);
223template<std::
unsigned_
integral Pixel>
224void FBPostProcessor<Pixel>::update(
const Setting&
setting)
noexcept
227 auto& noiseSetting = renderSettings.getNoiseSetting();
228 if (&
setting == &noiseSetting) {
229 preCalcNoise(noiseSetting.getFloat());
234template<std::
unsigned_
integral Pixel>
237 unsigned maxWidth_,
unsigned height_,
bool canDoInterlace_)
239 motherBoard_, display_, screen_, videoSource, maxWidth_, height_,
241 , noiseShift(screen.getLogicalHeight())
242 , pixelOps(screen.getPixelFormat())
245 noiseSetting.
attach(*
this);
246 preCalcNoise(noiseSetting.getFloat());
250template<std::
unsigned_
integral Pixel>
253 renderSettings.getNoiseSetting().detach(*
this);
256template<std::
unsigned_
integral Pixel>
259 auto& output = checked_cast<SDLOutputSurface&>(output_);
260 if (renderSettings.getInterleaveBlackFrame()) {
261 interleaveCount ^= 1;
262 if (interleaveCount) {
263 output.clearScreen();
268 if (!paintFrame)
return;
271 auto algo = renderSettings.getScaleAlgorithm();
272 unsigned factor = renderSettings.getScaleFactor();
273 unsigned inWidth = narrow<unsigned>(lrintf(renderSettings.getHorizontalStretch()));
274 if ((scaleAlgorithm != algo) || (scaleFactor != factor) ||
275 (inWidth != stretchWidth) || (lastOutput != &output)) {
276 scaleAlgorithm = algo;
277 scaleFactor = factor;
278 stretchWidth = inWidth;
279 lastOutput = &output;
284 output, pixelOps, inWidth);
288 const unsigned srcHeight = paintFrame->getHeight();
289 const unsigned dstHeight = output.getLogicalHeight();
291 unsigned g = std::gcd(srcHeight, dstHeight);
292 unsigned srcStep = srcHeight /
g;
293 unsigned dstStep = dstHeight /
g;
297 unsigned srcStartY = 0;
298 unsigned dstStartY = 0;
299 stretchScaler->frameStart();
300 while (dstStartY < dstHeight) {
303 assert(srcStartY < srcHeight);
306 unsigned lineWidth = getLineWidth(paintFrame, srcStartY, srcStep);
307 unsigned srcEndY = srcStartY + srcStep;
308 unsigned dstEndY = dstStartY + dstStep;
309 while ((srcEndY < srcHeight) && (dstEndY < dstHeight) &&
310 (getLineWidth(paintFrame, srcEndY, srcStep) == lineWidth)) {
318 currScaler->scaleImage(
319 *paintFrame, superImposeVideoFrame,
320 srcStartY, srcEndY, lineWidth,
321 *stretchScaler, dstStartY, dstEndY);
327 stretchScaler->frameStop();
331 output.flushFrameBuffer();
334template<std::
unsigned_
integral Pixel>
336 std::unique_ptr<RawFrame> finishedFrame, EmuTime::param time)
339 std::uniform_int_distribution<int> distribution(0, NOISE_SHIFT / 16 - 1);
340 for (
auto y :
xrange(screen.getLogicalHeight())) {
341 noiseShift[y] = narrow<uint16_t>(distribution(generator) * 16);
Represents the output window/screen of openMSX.
FBPostProcessor(MSXMotherBoard &motherBoard, Display &display, OutputSurface &screen, const std::string &videoSource, unsigned maxWidth, unsigned height, bool canDoInterlace)
~FBPostProcessor() override
void paint(OutputSurface &output) override
Paint this layer.
std::unique_ptr< RawFrame > rotateFrames(std::unique_ptr< RawFrame > finishedFrame, EmuTime::param time) override
Sets up the "abcdFrame" variables for a new frame.
A frame buffer where pixels can be written to.
int getLogicalWidth() const
Abstract base class for post processors.
virtual std::unique_ptr< RawFrame > rotateFrames(std::unique_ptr< RawFrame > finishedFrame, EmuTime::param time)
Sets up the "abcdFrame" variables for a new frame.
RenderSettings & renderSettings
Render settings.
OutputSurface & screen
The surface which is visible to the user.
FloatSetting & getNoiseSetting()
The amount of noise to add to the frame.
static std::unique_ptr< Scaler< Pixel > > createScaler(const PixelOperations< Pixel > &pixelOps, RenderSettings &renderSettings)
Instantiates a Scaler.
void attach(Observer< T > &observer)
void update(const Setting &setting) noexcept override
mat3 n3(vec3(1, 0, 3), vec3(4, 5, 6), vec3(7, 8, 9))
mat3 o3(vec3(1, 2, 3), vec3(4, 5, 0), vec3(7, 8, 9))
imat3 i3(ivec3(1, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
constexpr vecN< N, T > clamp(const vecN< N, T > &x, const vecN< N, T > &minVal, const vecN< N, T > &maxVal)
constexpr mat4 scale(const vec3 &xyz)
This file implemented 3 utility functions:
constexpr void fill(ForwardRange &&range, const T &value)
auto & global_urng()
Return reference to a (shared) global random number generator.
constexpr auto subspan(Range &&range, size_t offset, size_t count=std::dynamic_extent)
static std::unique_ptr< ScalerOutput< Pixel > > create(SDLOutputSurface &output, PixelOperations< Pixel > pixelOps, unsigned inWidth)
constexpr auto xrange(T e)