65 unsigned line, std::span<Pixel> helpBuf)
const
67 unsigned width0 = lastFrames[0]->getLineWidthDirect(line);
68 unsigned width1 = lastFrames[1]->getLineWidthDirect(line);
69 unsigned width2 = lastFrames[2]->getLineWidthDirect(line);
70 unsigned width3 = lastFrames[3]->getLineWidthDirect(line);
71 const Pixel* line0 = lastFrames[0]->getLineDirect(line).data();
72 const Pixel* line1 = lastFrames[1]->getLineDirect(line).data();
73 const Pixel* line2 = lastFrames[2]->getLineDirect(line).data();
74 const Pixel* line3 = lastFrames[3]->getLineDirect(line).data();
75 if ((width0 != width3) || (width0 != width2) || (width0 != width1)) {
77 return std::span{line0, width0};
83 auto* buf = helpBuf.data();
84 Pixel* out = (width0 <= helpBuf.size()) ? buf : buf2.data();
92 size_t remaining = width0;
94 size_t pixelsPerSSE =
sizeof(__m128i) /
sizeof(
Pixel);
95 size_t widthSSE = remaining & ~(pixelsPerSSE - 1);
101 auto byteOffst = -ptrdiff_t(widthSSE *
sizeof(
Pixel));
103 while (byteOffst < 0) {
104 __m128i a0 = uload(line0, byteOffst);
105 __m128i a1 = uload(line1, byteOffst);
106 __m128i a2 = uload(line2, byteOffst);
107 __m128i a3 = uload(line3, byteOffst);
109 __m128i e02 = compare(a0, a2);
110 __m128i e13 = compare(a1, a3);
111 __m128i cnd = _mm_and_si128(e02, e13);
113 __m128i a01 = blend(a0, a1);
114 __m128i p = _mm_xor_si128(a0, a01);
115 __m128i q = _mm_and_si128(p, cnd);
116 __m128i r = _mm_xor_si128(q, a0);
118 ustore(dst, byteOffst, r);
119 byteOffst +=
sizeof(__m128i);
121 remaining &= pixelsPerSSE - 1;
124 for (
auto x :
xrange(remaining)) {
125 dst[x] = ((line0[x] == line2[x]) && (line1[x] == line3[x]))
126 ? pixelOps.template blend<1, 1>(line0[x], line1[x])
130 if (width0 <= helpBuf.size()) {
132 return std::span{buf, width0};
135 scaleLine(std::span{out, width0}, helpBuf);