24 if (f == factor)
return;
28 auto pix = uint32_t(p);
29 t = narrow_cast<uint16_t>(
30 ((((pix & pixelOps.getRmask()) * f) >> 8) & pixelOps.getRmask()) |
31 ((((pix & pixelOps.getGmask()) * f) >> 8) & pixelOps.getGmask()) |
32 ((((pix & pixelOps.getBmask()) * f) >> 8) & pixelOps.getBmask()));
38 auto r = (((p & pixelOps.getRmask()) * f) >> 8) & pixelOps.getRmask();
39 auto g = (((p & pixelOps.getGmask()) * f) >> 8) & pixelOps.getGmask();
40 auto b = (((p & pixelOps.getBmask()) * f) >> 8) & pixelOps.getBmask();
41 return narrow_cast<uint16_t>(r |
g | b);
68 return multiply(p, factor);
75static inline void drawSSE2_1(
76 const char* __restrict in1,
const char* __restrict in2,
77 char* __restrict out, __m128i f)
79 __m128i zero = _mm_setzero_si128();
80 __m128i a = *
reinterpret_cast<const __m128i*
>(in1);
81 __m128i b = *
reinterpret_cast<const __m128i*
>(in2);
82 __m128i c = _mm_avg_epu8(a, b);
83 __m128i l = _mm_unpacklo_epi8(c, zero);
84 __m128i h = _mm_unpackhi_epi8(c, zero);
85 __m128i m = _mm_mulhi_epu16(l, f);
86 __m128i n = _mm_mulhi_epu16(h, f);
87 __m128i r = _mm_packus_epi16(m, n);
88 *
reinterpret_cast<__m128i*
>(out) = r;
90static inline void drawSSE2(
91 const uint32_t* __restrict in1_,
92 const uint32_t* __restrict in2_,
93 uint32_t* __restrict out_,
96 PixelOperations<uint32_t>& ,
99 width *=
sizeof(uint32_t);
101 assert((
reinterpret_cast<uintptr_t
>(in1_) %
sizeof(__m128i)) == 0);
102 assert((
reinterpret_cast<uintptr_t
>(in2_) %
sizeof(__m128i)) == 0);
103 assert((
reinterpret_cast<uintptr_t
>(out_) %
sizeof(__m128i)) == 0);
104 const auto* in1 =
reinterpret_cast<const char*
>(in1_) + width;
105 const auto* in2 =
reinterpret_cast<const char*
>(in2_) + width;
106 auto* out =
reinterpret_cast< char*
>(out_) + width;
108 __m128i f = _mm_set1_epi16(narrow_cast<int16_t>(factor << 8));
109 ptrdiff_t x = -ptrdiff_t(width);
111 drawSSE2_1(in1 + x + 0, in2 + x + 0, out + x + 0, f);
112 drawSSE2_1(in1 + x + 16, in2 + x + 16, out + x + 16, f);
113 drawSSE2_1(in1 + x + 32, in2 + x + 32, out + x + 32, f);
114 drawSSE2_1(in1 + x + 48, in2 + x + 48, out + x + 48, f);
120static inline void drawSSE2(
121 const uint16_t* __restrict in1_,
122 const uint16_t* __restrict in2_,
123 uint16_t* __restrict out_,
126 PixelOperations<uint16_t>& pixelOps,
127 Multiply<uint16_t>& darkener)
129 width *=
sizeof(uint16_t);
131 const auto* in1 =
reinterpret_cast<const char*
>(in1_) + width;
132 const auto* in2 =
reinterpret_cast<const char*
>(in2_) + width;
133 auto* out =
reinterpret_cast< char*
>(out_) + width;
135 darkener.setFactor(factor);
136 auto table = darkener.getTable();
137 __m128i mask = _mm_set1_epi16(narrow_cast<int16_t>(pixelOps.getBlendMask()));
139 ptrdiff_t x = -ptrdiff_t(width);
141 __m128i a = *
reinterpret_cast<const __m128i*
>(in1 + x);
142 __m128i b = *
reinterpret_cast<const __m128i*
>(in2 + x);
143 __m128i c = _mm_add_epi16(
146 _mm_and_si128(mask, _mm_xor_si128(a, b)),
148 *
reinterpret_cast<__m128i*
>(out + x) = _mm_set_epi16(
149 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 7)]),
150 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 6)]),
151 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 5)]),
152 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 4)]),
153 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 3)]),
154 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 2)]),
155 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 1)]),
156 narrow_cast<int16_t>(table[_mm_extract_epi16(c, 0)]));
182template<std::
unsigned_
integral Pixel>
184 : darkener(pixelOps_)
185 , pixelOps(pixelOps_)
189template<std::
unsigned_
integral Pixel>
191 std::span<const Pixel> src1, std::span<const Pixel> src2,
192 std::span<Pixel> dst,
unsigned factor)
194 auto width = src1.size();
195 assert(src1.size() == width);
196 assert(src2.size() == width);
197 assert(dst .
size() == width);
199 drawSSE2(src1.data(), src2.data(), dst.data(), factor, width, pixelOps, darkener);
202 darkener.setFactor(factor);
203 for (
auto x :
xrange(width)) {
204 dst[x] = darkener.multiply(
205 pixelOps.template blend<1, 1>(src1[x], src2[x]));
210template<std::
unsigned_
integral Pixel>
213 return darkener.multiply(p, factor);
216template<std::
unsigned_
integral Pixel>
219 return darkener.multiply(pixelOps.template blend<1, 1>(p1, p2), factor);
Helper class to perform 'pixel x scalar' calculations.
static Pixel multiply(Pixel p, unsigned x)
Perform a component wise multiplication of a pixel with an 8-bit fractional value: result = (pixel * ...
Helper class to draw scanlines.
void draw(std::span< const Pixel > src1, std::span< const Pixel > src2, std::span< Pixel > dst, unsigned factor)
Draws a scanline.
Pixel darken(Pixel p, unsigned factor) const
Darken one pixel.
Scanline(const PixelOperations< Pixel > &pixelOps)
constexpr auto enumerate(Iterable &&iterable)
Heavily inspired by Nathan Reed's blog post: Python-Like enumerate() In C++17 http://reedbeta....
This file implemented 3 utility functions:
size_t size(std::string_view utf8)
constexpr auto xrange(T e)