63 unsigned line, std::span<Pixel> helpBuf)
const
65 unsigned width0 = lastFrames[0]->getLineWidthDirect(line);
66 unsigned width1 = lastFrames[1]->getLineWidthDirect(line);
67 unsigned width2 = lastFrames[2]->getLineWidthDirect(line);
68 unsigned width3 = lastFrames[3]->getLineWidthDirect(line);
69 const Pixel* line0 = lastFrames[0]->getLineDirect(line).data();
70 const Pixel* line1 = lastFrames[1]->getLineDirect(line).data();
71 const Pixel* line2 = lastFrames[2]->getLineDirect(line).data();
72 const Pixel* line3 = lastFrames[3]->getLineDirect(line).data();
73 if ((width0 != width3) || (width0 != width2) || (width0 != width1)) {
75 return std::span{line0, width0};
81 auto* buf = helpBuf.data();
82 Pixel* out = (width0 <= helpBuf.size()) ? buf : buf2.data();
90 size_t remaining = width0;
92 size_t pixelsPerSSE =
sizeof(__m128i) /
sizeof(
Pixel);
93 size_t widthSSE = remaining & ~(pixelsPerSSE - 1);
99 auto byteOffst = -ptrdiff_t(widthSSE *
sizeof(
Pixel));
101 while (byteOffst < 0) {
102 __m128i a0 = uload(line0, byteOffst);
103 __m128i a1 = uload(line1, byteOffst);
104 __m128i a2 = uload(line2, byteOffst);
105 __m128i a3 = uload(line3, byteOffst);
107 __m128i e02 = compare(a0, a2);
108 __m128i e13 = compare(a1, a3);
109 __m128i cnd = _mm_and_si128(e02, e13);
111 __m128i a01 = blend(a0, a1);
112 __m128i p = _mm_xor_si128(a0, a01);
113 __m128i q = _mm_and_si128(p, cnd);
114 __m128i r = _mm_xor_si128(q, a0);
116 ustore(dst, byteOffst, r);
117 byteOffst +=
sizeof(__m128i);
119 remaining &= pixelsPerSSE - 1;
122 for (
auto x :
xrange(remaining)) {
123 dst[x] = ((line0[x] == line2[x]) && (line1[x] == line3[x]))
124 ? pixelOps.template blend<1, 1>(line0[x], line1[x])
128 if (width0 <= helpBuf.size()) {
130 return std::span{buf, width0};
133 scaleLine(std::span{out, width0}, helpBuf);