37template<
int BYTES,
int TMP =
sizeof(__m128i) - BYTES>
38[[nodiscard]]
static inline __m128i align(__m128i high, __m128i low)
41 return _mm_alignr_epi8(high, low, BYTES);
44 _mm_slli_si128(high, TMP),
45 _mm_srli_si128(low, BYTES));
51[[nodiscard]]
static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
62 return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
66template<std::
unsigned_
integral Pixel> [[nodiscard]]
static inline __m128i isEqual(__m128i x, __m128i y)
68 if constexpr (
sizeof(
Pixel) == 4) {
69 return _mm_cmpeq_epi32(x, y);
70 }
else if constexpr (
sizeof(
Pixel) == 2) {
71 return _mm_cmpeq_epi16(x, y);
76template<std::
unsigned_
integral Pixel> [[nodiscard]]
static inline __m128i unpacklo(__m128i x, __m128i y)
78 if constexpr (
sizeof(
Pixel) == 4) {
79 return _mm_unpacklo_epi32(x, y);
80 }
else if constexpr (
sizeof(
Pixel) == 2) {
81 return _mm_unpacklo_epi16(x, y);
86template<std::
unsigned_
integral Pixel> [[nodiscard]]
static inline __m128i unpackhi(__m128i x, __m128i y)
88 if constexpr (
sizeof(
Pixel) == 4) {
89 return _mm_unpackhi_epi32(x, y);
90 }
else if constexpr (
sizeof(
Pixel) == 2) {
91 return _mm_unpackhi_epi16(x, y);
98template<std::
unsigned_
integral Pixel,
bool DOUBLE_X>
static inline void scale1(
99 __m128i top, __m128i bottom,
100 __m128i prev, __m128i mid, __m128i next,
101 __m128i* out0, __m128i* out1)
103 __m128i left = align<
sizeof(__m128i) -
sizeof(
Pixel)>(mid, prev);
104 __m128i right = align< sizeof(Pixel)>(next, mid);
106 __m128i teqb = isEqual<Pixel>(top, bottom);
107 __m128i leqt = isEqual<Pixel>(left, top);
108 __m128i reqt = isEqual<Pixel>(right, top);
109 __m128i leqb = isEqual<Pixel>(left, bottom);
110 __m128i reqb = isEqual<Pixel>(right, bottom);
112 __m128i cndA = _mm_andnot_si128(_mm_or_si128(teqb, reqt), leqt);
113 __m128i cndB = _mm_andnot_si128(_mm_or_si128(teqb, leqt), reqt);
114 __m128i cndC = _mm_andnot_si128(_mm_or_si128(teqb, reqb), leqb);
115 __m128i cndD = _mm_andnot_si128(_mm_or_si128(teqb, leqb), reqb);
117 __m128i a = select(mid, top, cndA);
118 __m128i b = select(mid, top, cndB);
119 __m128i c = select(mid, bottom, cndC);
120 __m128i d = select(mid, bottom, cndD);
122 if constexpr (DOUBLE_X) {
123 out0[0] = unpacklo<Pixel>(a, b);
124 out0[1] = unpackhi<Pixel>(a, b);
125 out1[0] = unpacklo<Pixel>(c, d);
126 out1[1] = unpackhi<Pixel>(c, d);
135template<
bool DOUBLE_X, std::unsigned_integral
Pixel,
136 int SHIFT =
sizeof(__m128i) -
sizeof(
Pixel)>
137static inline void scaleSSE(
138 Pixel* __restrict out0_,
139 Pixel* __restrict out1_,
140 const Pixel* __restrict in0_,
141 const Pixel* __restrict in1_,
142 const Pixel* __restrict in2_,
146 assert((
reinterpret_cast<uintptr_t
>(in0_ ) %
sizeof(__m128i)) == 0);
147 assert((
reinterpret_cast<uintptr_t
>(in1_ ) %
sizeof(__m128i)) == 0);
148 assert((
reinterpret_cast<uintptr_t
>(in2_ ) %
sizeof(__m128i)) == 0);
149 assert((
reinterpret_cast<uintptr_t
>(out0_) %
sizeof(__m128i)) == 0);
150 assert((
reinterpret_cast<uintptr_t
>(out1_) %
sizeof(__m128i)) == 0);
153 width *=
sizeof(
Pixel);
154 assert((width %
sizeof(__m128i)) == 0);
156 width -=
sizeof(__m128i);
158 constexpr size_t SCALE = DOUBLE_X ? 2 : 1;
163 const auto* in0 =
reinterpret_cast<const char*
>(in0_ ) + width;
164 const auto* in1 =
reinterpret_cast<const char*
>(in1_ ) + width;
165 const auto* in2 =
reinterpret_cast<const char*
>(in2_ ) + width;
166 auto* out0 =
reinterpret_cast< char*
>(out0_) + SCALE * width;
167 auto* out1 =
reinterpret_cast< char*
>(out1_) + SCALE * width;
168 ptrdiff_t x = -ptrdiff_t(width);
171 __m128i
next = *
reinterpret_cast<const __m128i*
>(in1 + x);
172 __m128i mid = _mm_slli_si128(next, SHIFT);
176 __m128i top = *
reinterpret_cast<const __m128i*
>(in0 + x);
177 __m128i bottom = *
reinterpret_cast<const __m128i*
>(in2 + x);
180 next = *
reinterpret_cast<const __m128i*
>(in1 + x +
sizeof(__m128i));
181 scale1<Pixel, DOUBLE_X>(top, bottom, prev, mid, next,
182 reinterpret_cast<__m128i*
>(out0 + SCALE * x),
183 reinterpret_cast<__m128i*
>(out1 + SCALE * x));
184 x +=
sizeof(__m128i);
189 __m128i top = *
reinterpret_cast<const __m128i*
>(in0);
190 __m128i bottom = *
reinterpret_cast<const __m128i*
>(in2);
193 next = _mm_srli_si128(next, SHIFT);
194 scale1<Pixel, DOUBLE_X>(top, bottom, prev, mid, next,
195 reinterpret_cast<__m128i*
>(out0),
196 reinterpret_cast<__m128i*
>(out1));
202template<std::
unsigned_
integral Pixel>
208template<std::
unsigned_
integral Pixel>
210 std::span<Pixel> dst0, std::span<Pixel> dst1,
211 std::span<const Pixel> src0, std::span<const Pixel> src1, std::span<const Pixel> src2)
213 auto srcWidth = src0.size();
214 assert(src0.size() == srcWidth);
215 assert(src1.size() == srcWidth);
216 assert(src2.size() == srcWidth);
217 assert(dst0.size() == 2 * srcWidth);
218 assert(dst1.size() == 2 * srcWidth);
226 scaleSSE<true>(dst0.data(), dst1.data(), src0.data(), src1.data(), src2.data(), srcWidth);
228 scaleLineHalf_1on2(dst0, src0, src1, src2);
229 scaleLineHalf_1on2(dst1, src2, src1, src0);
233template<std::
unsigned_
integral Pixel>
234void Scale2xScaler<Pixel>::scaleLineHalf_1on2(
235 std::span<Pixel> dst,
236 std::span<const Pixel> src0, std::span<const Pixel> src1, std::span<const Pixel> src2)
238 auto srcWidth = src0.size();
248 Pixel right = src1[1];
250 dst[1] = (right == src0[0] && src2[0] != src0[0]) ? src0[0] : mid;
253 for (
auto x :
xrange(1u, srcWidth - 1)) {
259 dst[2 * x + 0] = (left == top && right != top && bot != top) ? top : mid;
260 dst[2 * x + 1] = (right == top && left != top && bot != top) ? top : mid;
264 dst[2 * srcWidth - 2] =
265 (mid == src0[srcWidth - 1] && src2[srcWidth - 1] != src0[srcWidth - 1])
266 ? src0[srcWidth - 1] : right;
267 dst[2 * srcWidth - 1] =
271template<std::
unsigned_
integral Pixel>
272inline void Scale2xScaler<Pixel>::scaleLine_1on1(
273 std::span<Pixel> dst0, std::span<Pixel> dst1,
274 std::span<const Pixel> src0, std::span<const Pixel> src1, std::span<const Pixel> src2)
276 auto srcWidth = src0.size();
277 assert(src0.size() == srcWidth);
278 assert(src1.size() == srcWidth);
279 assert(src2.size() == srcWidth);
280 assert(dst0.size() == srcWidth);
281 assert(dst1.size() == srcWidth);
284 scaleSSE<false>(dst0.data(), dst1.data(), src0.data(), src1.data(), src2.data(), srcWidth);
286 scaleLineHalf_1on1(dst0, src0, src1, src2);
287 scaleLineHalf_1on1(dst1, src2, src1, src0);
291template<std::
unsigned_
integral Pixel>
292void Scale2xScaler<Pixel>::scaleLineHalf_1on1(
293 std::span<Pixel> dst,
294 std::span<const Pixel> src0, std::span<const Pixel> src1, std::span<const Pixel> src2)
296 auto srcWidth = src0.size();
303 Pixel right = src1[1];
307 for (
auto x :
xrange(1u, srcWidth - 1)) {
313 dst[x] = (left == top && right != top && bot != top) ? top : mid;
318 (mid == src0[srcWidth - 1] && src2[srcWidth - 1] != src0[srcWidth - 1])
319 ? src0[srcWidth - 1] : right;
322template<std::
unsigned_
integral Pixel>
324 unsigned srcStartY,
unsigned ,
unsigned srcWidth,
331 auto srcY = narrow<int>(srcStartY);
332 auto srcPrev = src.
getLine(srcY - 1, buf0);
333 auto srcCurr = src.
getLine(srcY + 0, buf1);
335 for (
unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
336 auto srcNext = src.
getLine(srcY + 1, buf2);
339 scaleLine_1on2(dstUpper, dstLower,
340 srcPrev, srcCurr, srcNext);
350template<std::
unsigned_
integral Pixel>
352 unsigned srcStartY,
unsigned ,
unsigned srcWidth,
359 auto srcY = narrow<int>(srcStartY);
360 auto srcPrev = src.
getLine(srcY - 1, buf0);
361 auto srcCurr = src.
getLine(srcY + 0, buf1);
363 for (
unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
364 auto srcNext = src.
getLine(srcY + 1, buf2);
367 scaleLine_1on1(dstUpper, dstLower,
368 srcPrev, srcCurr, srcNext);
Interface for getting lines from a video frame.
std::span< const Pixel > getLine(int line, std::span< Pixel > buf) const
Gets a pointer to the pixels of the given line number.
Runs the Scale2x scaler algorithm.
void scale1x1to2x2(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY) override
void scale1x1to1x2(FrameSource &src, unsigned srcStartY, unsigned srcEndY, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY) override
Scale2xScaler(const PixelOperations< Pixel > &pixelOps)
Base class for 2x scalers.
virtual void releaseLine(unsigned y, std::span< Pixel > buf)=0
virtual std::span< Pixel > acquireLine(unsigned y)=0
This file implemented 3 utility functions:
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
uint32_t next(octet_iterator &it, octet_iterator end)
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
constexpr auto xrange(T e)