openMSX
LineScalers.hh
Go to the documentation of this file.
1#ifndef LINESCALERS_HH
2#define LINESCALERS_HH
3
4#include "PixelOperations.hh"
5
6#include "ranges.hh"
7#include "view.hh"
8#include "xrange.hh"
9
10#include <bit>
11#include <cassert>
12#include <cstddef>
13#include <cstdint>
14#include <span>
15#ifdef __SSE2__
16#include "emmintrin.h"
17#endif
18
19namespace openmsx {
20
21using Pixel = uint32_t;
22
29void scale_1on3(std::span<const Pixel> in, std::span<Pixel> out);
30void scale_1on4(std::span<const Pixel> in, std::span<Pixel> out);
31void scale_1on6(std::span<const Pixel> in, std::span<Pixel> out);
32void Scale_1on2(std::span<const Pixel> in, std::span<Pixel> out);
33void scale_2on1(std::span<const Pixel> in, std::span<Pixel> out);
34void scale_6on1(std::span<const Pixel> in, std::span<Pixel> out);
35void scale_4on1(std::span<const Pixel> in, std::span<Pixel> out);
36void scale_3on1(std::span<const Pixel> in, std::span<Pixel> out);
37void scale_3on2(std::span<const Pixel> in, std::span<Pixel> out);
38void scale_3on4(std::span<const Pixel> in, std::span<Pixel> out);
39void scale_3on8(std::span<const Pixel> in, std::span<Pixel> out);
40void scale_2on3(std::span<const Pixel> in, std::span<Pixel> out);
41void scale_4on3(std::span<const Pixel> in, std::span<Pixel> out);
42void scale_8on3(std::span<const Pixel> in, std::span<Pixel> out);
43void scale_2on9(std::span<const Pixel> in, std::span<Pixel> out);
44void scale_4on9(std::span<const Pixel> in, std::span<Pixel> out);
45void scale_8on9(std::span<const Pixel> in, std::span<Pixel> out);
46void scale_4on5(std::span<const Pixel> in, std::span<Pixel> out);
47void scale_7on8(std::span<const Pixel> in, std::span<Pixel> out);
48void scale_9on10(std::span<const Pixel> in, std::span<Pixel> out);
49void scale_17on20(std::span<const Pixel> in, std::span<Pixel> out);
50
58template<unsigned w1 = 1, unsigned w2 = 1>
59void blendLines(std::span<const Pixel> in1, std::span<const Pixel> in2,
60 std::span<Pixel> out);
61
69void alphaBlendLines(std::span<const Pixel> in1, std::span<const Pixel> in2,
70 std::span<Pixel> out);
71void alphaBlendLines(Pixel in1, std::span<const Pixel> in2,
72 std::span<Pixel> out);
73
74
75// implementation
76
77template<unsigned N>
78static inline void scale_1onN(
79 std::span<const Pixel> in, std::span<Pixel> out)
80{
81 auto outWidth = out.size();
82 assert(in.size() == (outWidth / N));
83
84 size_t i = 0, j = 0;
85 for (/* */; i < (outWidth - (N - 1)); i += N, j += 1) {
86 Pixel pix = in[j];
87 for (auto k : xrange(N)) {
88 out[i + k] = pix;
89 }
90 }
91 for (auto k : xrange(N - 1)) {
92 if ((i + k) < outWidth) out[i + k] = 0;
93 }
94}
95
96inline void scale_1on3(std::span<const Pixel> in, std::span<Pixel> out)
97{
98 scale_1onN<3>(in, out);
99}
100
101inline void scale_1on4(std::span<const Pixel> in, std::span<Pixel> out)
102{
103 scale_1onN<4>(in, out);
104}
105
106inline void scale_1on6(std::span<const Pixel> in, std::span<Pixel> out)
107{
108 scale_1onN<6>(in, out);
109}
110
111#ifdef __SSE2__
112inline __m128i unpacklo(__m128i x, __m128i y)
113{
114 // 32bpp
115 return _mm_unpacklo_epi32(x, y);
116}
117inline __m128i unpackhi(__m128i x, __m128i y)
118{
119 // 32bpp
120 return _mm_unpackhi_epi32(x, y);
121}
122
123inline void scale_1on2_SSE(const Pixel* __restrict in_, Pixel* __restrict out_, size_t srcWidth)
124{
125 size_t bytes = srcWidth * sizeof(Pixel);
126 assert((bytes % (4 * sizeof(__m128i))) == 0);
127 assert(bytes != 0);
128
129 const auto* in = std::bit_cast<const char*>(in_) + bytes;
130 auto* out = std::bit_cast< char*>(out_) + 2 * bytes;
131
132 auto x = -ptrdiff_t(bytes);
133 do {
134 __m128i a0 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + x + 0));
135 __m128i a1 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + x + 16));
136 __m128i a2 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + x + 32));
137 __m128i a3 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + x + 48));
138 __m128i l0 = unpacklo(a0, a0);
139 __m128i h0 = unpackhi(a0, a0);
140 __m128i l1 = unpacklo(a1, a1);
141 __m128i h1 = unpackhi(a1, a1);
142 __m128i l2 = unpacklo(a2, a2);
143 __m128i h2 = unpackhi(a2, a2);
144 __m128i l3 = unpacklo(a3, a3);
145 __m128i h3 = unpackhi(a3, a3);
146 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 0), l0);
147 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 16), h0);
148 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 32), l1);
149 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 48), h1);
150 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 64), l2);
151 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 80), h2);
152 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 96), l3);
153 _mm_storeu_si128(std::bit_cast<__m128i*>(out + 2*x + 112), h3);
154 x += 4 * sizeof(__m128i);
155 } while (x < 0);
156}
157#endif
158
159inline void scale_1on2(std::span<const Pixel> in, std::span<Pixel> out)
160{
161 // This is a fairly simple algorithm (output each input pixel twice).
162 // An ideal compiler should generate optimal (vector) code for it.
163 // I checked the 2013-05-29 dev snapshots of gcc-4.9 and clang-3.4:
164 // - Clang is not able to vectorize this loop. My best tuned C version
165 // of this routine is a little over 4x slower than the tuned
166 // SSE-intrinsics version.
167 // - Gcc can auto-vectorize this routine. Though my best tuned version
168 // (I mean tuned to further improve the auto-vectorization, including
169 // using the new __builtin_assume_aligned() intrinsic) still runs
170 // approx 40% slower than the intrinsics version.
171 // Hopefully in some years the compilers have improved further so that
172 // the intrinsic version is no longer needed.
173 auto srcWidth = in.size();
174 assert(out.size() == 2 * srcWidth);
175
176#ifdef __SSE2__
177 size_t chunk = 4 * sizeof(__m128i) / sizeof(Pixel);
178 size_t srcWidth2 = srcWidth & ~(chunk - 1);
179 scale_1on2_SSE(in.data(), out.data(), srcWidth2);
180 in = in .subspan( srcWidth2);
181 out = out.subspan(2 * srcWidth2);
182 srcWidth -= srcWidth2;
183#endif
184
185 // C++ version. Used both on non-x86 machines and (possibly) on x86 for
186 // the last few pixels of the line.
187 for (auto x : xrange(srcWidth)) {
188 out[x * 2] = out[x * 2 + 1] = in[x];
189 }
190}
191
192#ifdef __SSE2__
193template<int IMM8> static inline __m128i shuffle(__m128i x, __m128i y)
194{
195 return _mm_castps_si128(_mm_shuffle_ps(
196 _mm_castsi128_ps(x), _mm_castsi128_ps(y), IMM8));
197}
198
199inline __m128i blend(__m128i x, __m128i y)
200{
201 // 32bpp
202 __m128i p = shuffle<0x88>(x, y);
203 __m128i q = shuffle<0xDD>(x, y);
204 return _mm_avg_epu8(p, q);
205}
206
207inline void scale_2on1_SSE(
208 const Pixel* __restrict in_, Pixel* __restrict out_, size_t dstBytes)
209{
210 assert((dstBytes % (4 * sizeof(__m128i))) == 0);
211 assert(dstBytes != 0);
212
213 const auto* in = std::bit_cast<const char*>(in_) + 2 * dstBytes;
214 auto* out = std::bit_cast< char*>(out_) + dstBytes;
215
216 auto x = -ptrdiff_t(dstBytes);
217 do {
218 __m128i a0 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 0));
219 __m128i a1 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 16));
220 __m128i a2 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 32));
221 __m128i a3 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 48));
222 __m128i a4 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 64));
223 __m128i a5 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 80));
224 __m128i a6 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 96));
225 __m128i a7 = _mm_loadu_si128(std::bit_cast<const __m128i*>(in + 2*x + 112));
226 __m128i b0 = blend(a0, a1);
227 __m128i b1 = blend(a2, a3);
228 __m128i b2 = blend(a4, a5);
229 __m128i b3 = blend(a6, a7);
230 _mm_storeu_si128(std::bit_cast<__m128i*>(out + x + 0), b0);
231 _mm_storeu_si128(std::bit_cast<__m128i*>(out + x + 16), b1);
232 _mm_storeu_si128(std::bit_cast<__m128i*>(out + x + 32), b2);
233 _mm_storeu_si128(std::bit_cast<__m128i*>(out + x + 48), b3);
234 x += 4 * sizeof(__m128i);
235 } while (x < 0);
236}
237#endif
238
239inline void scale_2on1(std::span<const Pixel> in, std::span<Pixel> out)
240{
241 assert(in.size() == 2 * out.size());
242 auto outWidth = out.size();
243#ifdef __SSE2__
244 auto n64 = (outWidth * sizeof(Pixel)) & ~63;
245 scale_2on1_SSE(in.data(), out.data(), n64); // process 64 byte chunks
246 outWidth &= ((64 / sizeof(Pixel)) - 1); // remaining pixels (if any)
247 if (outWidth == 0) [[likely]] return;
248 in = in .subspan(2 * n64 / sizeof(Pixel));
249 out = out.subspan( n64 / sizeof(Pixel));
250 // fallthrough to c++ version
251#endif
252 // pure C++ version
253 PixelOperations pixelOps;
254 for (auto i : xrange(outWidth)) {
255 out[i] = pixelOps.template blend<1, 1>(
256 in[2 * i + 0], in[2 * i + 1]);
257 }
258}
259
260inline void scale_6on1(std::span<const Pixel> in, std::span<Pixel> out)
261{
262 assert(in.size() == 6 * out.size());
263 PixelOperations pixelOps;
264 for (auto i : xrange(out.size())) {
265 out[i] = pixelOps.template blend<1, 1, 1, 1, 1, 1>(subspan<6>(in, 6 * i));
266 }
267}
268
269inline void scale_4on1(std::span<const Pixel> in, std::span<Pixel> out)
270{
271 assert(in.size() == 4 * out.size());
272 PixelOperations pixelOps;
273 for (auto i : xrange(out.size())) {
274 out[i] = pixelOps.template blend<1, 1, 1, 1>(subspan<4>(in, 4 * i));
275 }
276}
277
278inline void scale_3on1(std::span<const Pixel> in, std::span<Pixel> out)
279{
280 assert(in.size() == 3 * out.size());
281 PixelOperations pixelOps;
282 for (auto i : xrange(out.size())) {
283 out[i] = pixelOps.template blend<1, 1, 1>(subspan<3>(in, 3 * i));
284 }
285}
286
287inline void scale_3on2(std::span<const Pixel> in, std::span<Pixel> out)
288{
289 assert((in.size() / 3) == (out.size() / 2));
290 PixelOperations pixelOps;
291 size_t n = out.size();
292 size_t i = 0, j = 0;
293 for (/* */; i < (n - 1); i += 2, j += 3) {
294 out[i + 0] = pixelOps.template blend<2, 1>(subspan<2>(in, j + 0));
295 out[i + 1] = pixelOps.template blend<1, 2>(subspan<2>(in, j + 1));
296 }
297 if (i < n) out[i] = 0;
298}
299
300inline void scale_3on4(std::span<const Pixel> in, std::span<Pixel> out)
301{
302 assert((in.size() / 3) == (out.size() / 4));
303 PixelOperations pixelOps;
304 size_t n = out.size();
305 size_t i = 0, j = 0;
306 for (/* */; i < (n - 3); i += 4, j += 3) {
307 out[i + 0] = in[j + 0];
308 out[i + 1] = pixelOps.template blend<1, 2>(subspan<2>(in, j + 0));
309 out[i + 2] = pixelOps.template blend<2, 1>(subspan<2>(in, j + 1));
310 out[i + 3] = in[j + 2];
311 }
312 for (auto k : xrange(4 - 1)) {
313 if ((i + k) < n) out[i + k] = 0;
314 }
315}
316
317inline void scale_3on8(std::span<const Pixel> in, std::span<Pixel> out)
318{
319 assert((in.size() / 3) == (out.size() / 8));
320 PixelOperations pixelOps;
321 size_t n = out.size();
322 size_t i = 0, j = 0;
323 for (/* */; i < (n - 7); i += 8, j += 3) {
324 out[i + 0] = in[j + 0];
325 out[i + 1] = in[j + 0];
326 out[i + 2] = pixelOps.template blend<2, 1>(subspan<2>(in, j + 0));
327 out[i + 3] = in[j + 1];
328 out[i + 4] = in[j + 1];
329 out[i + 5] = pixelOps.template blend<1, 2>(subspan<2>(in, j + 1));
330 out[i + 6] = in[j + 2];
331 out[i + 7] = in[j + 2];
332 }
333 for (auto k : xrange(8 - 1)) {
334 if ((i + k) < n) out[i + k] = 0;
335 }
336}
337
338inline void scale_2on3(std::span<const Pixel> in, std::span<Pixel> out)
339{
340 assert((in.size() / 2) == (out.size() / 3));
341 PixelOperations pixelOps;
342 size_t n = out.size();
343 size_t i = 0, j = 0;
344 for (/* */; i < (n - 2); i += 3, j += 2) {
345 out[i + 0] = in[j + 0];
346 out[i + 1] = pixelOps.template blend<1, 1>(subspan<2>(in, j));
347 out[i + 2] = in[j + 1];
348 }
349 if ((i + 0) < n) out[i + 0] = 0;
350 if ((i + 1) < n) out[i + 1] = 0;
351}
352
353inline void scale_4on3(std::span<const Pixel> in, std::span<Pixel> out)
354{
355 assert((in.size() / 4) == (out.size() / 3));
356 PixelOperations pixelOps;
357 size_t n = out.size();
358 size_t i = 0, j = 0;
359 for (/* */; i < (n - 2); i += 3, j += 4) {
360 out[i + 0] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 0));
361 out[i + 1] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 1));
362 out[i + 2] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 2));
363 }
364 if ((i + 0) < n) out[i + 0] = 0;
365 if ((i + 1) < n) out[i + 1] = 0;
366}
367
368inline void scale_8on3(std::span<const Pixel> in, std::span<Pixel> out)
369{
370 assert((in.size() / 8) == (out.size() / 3));
371 PixelOperations pixelOps;
372 size_t n = out.size();
373 size_t i = 0, j = 0;
374 for (/* */; i < (n - 2); i += 3, j += 8) {
375 out[i + 0] = pixelOps.template blend<3, 3, 2> (subspan<3>(in, j + 0));
376 out[i + 1] = pixelOps.template blend<1, 3, 3, 1>(subspan<4>(in, j + 2));
377 out[i + 2] = pixelOps.template blend<2, 3, 3> (subspan<3>(in, j + 5));
378 }
379 if ((i + 0) < n) out[i + 0] = 0;
380 if ((i + 1) < n) out[i + 1] = 0;
381}
382
383inline void scale_2on9(std::span<const Pixel> in, std::span<Pixel> out)
384{
385 assert((in.size() / 2) == (out.size() / 9));
386 PixelOperations pixelOps;
387 size_t n = out.size();
388 size_t i = 0, j = 0;
389 for (/* */; i < (n - 8); i += 9, j += 2) {
390 out[i + 0] = in[j + 0];
391 out[i + 1] = in[j + 0];
392 out[i + 2] = in[j + 0];
393 out[i + 3] = in[j + 0];
394 out[i + 4] = pixelOps.template blend<1, 1>(subspan<2>(in, j));
395 out[i + 5] = in[j + 1];
396 out[i + 6] = in[j + 1];
397 out[i + 7] = in[j + 1];
398 out[i + 8] = in[j + 1];
399 }
400 if ((i + 0) < n) out[i + 0] = 0;
401 if ((i + 1) < n) out[i + 1] = 0;
402 if ((i + 2) < n) out[i + 2] = 0;
403 if ((i + 3) < n) out[i + 3] = 0;
404 if ((i + 4) < n) out[i + 4] = 0;
405 if ((i + 5) < n) out[i + 5] = 0;
406 if ((i + 6) < n) out[i + 6] = 0;
407 if ((i + 7) < n) out[i + 7] = 0;
408}
409
410inline void scale_4on9(std::span<const Pixel> in, std::span<Pixel> out)
411{
412 assert((in.size() / 4) == (out.size() / 9));
413 PixelOperations pixelOps;
414 size_t n = out.size();
415 size_t i = 0, j = 0;
416 for (/* */; i < (n - 8); i += 9, j += 4) {
417 out[i + 0] = in[j + 0];
418 out[i + 1] = in[j + 0];
419 out[i + 2] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 0));
420 out[i + 3] = in[j + 1];
421 out[i + 4] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 1));
422 out[i + 5] = in[j + 2];
423 out[i + 6] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 2));
424 out[i + 7] = in[j + 3];
425 out[i + 8] = in[j + 3];
426 }
427 if ((i + 0) < n) out[i + 0] = 0;
428 if ((i + 1) < n) out[i + 1] = 0;
429 if ((i + 2) < n) out[i + 2] = 0;
430 if ((i + 3) < n) out[i + 3] = 0;
431 if ((i + 4) < n) out[i + 4] = 0;
432 if ((i + 5) < n) out[i + 5] = 0;
433 if ((i + 6) < n) out[i + 6] = 0;
434 if ((i + 7) < n) out[i + 7] = 0;
435}
436
437inline void scale_8on9(std::span<const Pixel> in, std::span<Pixel> out)
438{
439 assert((in.size() / 8) == (out.size() / 9));
440 PixelOperations pixelOps;
441 size_t n = out.size();
442 size_t i = 0, j = 0;
443 for (/* */; i < (n - 8); i += 9, j += 8) {
444 out[i + 0] = in[j + 0];
445 out[i + 1] = pixelOps.template blend<1, 7>(subspan<2>(in, j + 0));
446 out[i + 2] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 1));
447 out[i + 3] = pixelOps.template blend<3, 5>(subspan<2>(in, j + 2));
448 out[i + 4] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 3));
449 out[i + 5] = pixelOps.template blend<5, 3>(subspan<2>(in, j + 4));
450 out[i + 6] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 5));
451 out[i + 7] = pixelOps.template blend<7, 1>(subspan<2>(in, j + 6));
452 out[i + 8] = in[j + 7];
453 }
454 if ((i + 0) < n) out[i + 0] = 0;
455 if ((i + 1) < n) out[i + 1] = 0;
456 if ((i + 2) < n) out[i + 2] = 0;
457 if ((i + 3) < n) out[i + 3] = 0;
458 if ((i + 4) < n) out[i + 4] = 0;
459 if ((i + 5) < n) out[i + 5] = 0;
460 if ((i + 6) < n) out[i + 6] = 0;
461 if ((i + 7) < n) out[i + 7] = 0;
462}
463
464inline void scale_4on5(std::span<const Pixel> in, std::span<Pixel> out)
465{
466 assert((in.size() / 4) == (out.size() / 5));
467 PixelOperations pixelOps;
468 size_t n = out.size();
469 assert((n % 5) == 0);
470 for (size_t i = 0, j = 0; i < n; i += 5, j += 4) {
471 out[i + 0] = in[j + 0];
472 out[i + 1] = pixelOps.template blend<1, 3>(subspan<2>(in, j + 0));
473 out[i + 2] = pixelOps.template blend<1, 1>(subspan<2>(in, j + 1));
474 out[i + 3] = pixelOps.template blend<3, 1>(subspan<2>(in, j + 2));
475 out[i + 4] = in[j + 3];
476 }
477}
478
479inline void scale_7on8(std::span<const Pixel> in, std::span<Pixel> out)
480{
481 assert((in.size() / 7) == (out.size() / 8));
482 PixelOperations pixelOps;
483 size_t n = out.size();
484 assert((n % 8) == 0);
485 for (size_t i = 0, j = 0; i < n; i += 8, j += 7) {
486 out[i + 0] = in[j + 0];
487 out[i + 1] = pixelOps.template blend<1, 6>(subspan<2>(in, j + 0));
488 out[i + 2] = pixelOps.template blend<2, 5>(subspan<2>(in, j + 1));
489 out[i + 3] = pixelOps.template blend<3, 4>(subspan<2>(in, j + 2));
490 out[i + 4] = pixelOps.template blend<4, 3>(subspan<2>(in, j + 3));
491 out[i + 5] = pixelOps.template blend<5, 2>(subspan<2>(in, j + 4));
492 out[i + 6] = pixelOps.template blend<6, 1>(subspan<2>(in, j + 5));
493 out[i + 7] = in[j + 6];
494 }
495}
496
497inline void scale_17on20(std::span<const Pixel> in, std::span<Pixel> out)
498{
499 assert((in.size() / 17) == (out.size() / 20));
500 PixelOperations pixelOps;
501 size_t n = out.size();
502 assert((n % 20) == 0);
503 for (size_t i = 0, j = 0; i < n; i += 20, j += 17) {
504 out[i + 0] = in[j + 0];
505 out[i + 1] = pixelOps.template blend< 3, 14>(subspan<2>(in, j + 0));
506 out[i + 2] = pixelOps.template blend< 6, 11>(subspan<2>(in, j + 1));
507 out[i + 3] = pixelOps.template blend< 9, 8>(subspan<2>(in, j + 2));
508 out[i + 4] = pixelOps.template blend<12, 5>(subspan<2>(in, j + 3));
509 out[i + 5] = pixelOps.template blend<15, 2>(subspan<2>(in, j + 4));
510 out[i + 6] = in[j + 5];
511 out[i + 7] = pixelOps.template blend< 1, 16>(subspan<2>(in, j + 5));
512 out[i + 8] = pixelOps.template blend< 4, 13>(subspan<2>(in, j + 6));
513 out[i + 9] = pixelOps.template blend< 7, 10>(subspan<2>(in, j + 7));
514 out[i + 10] = pixelOps.template blend<10, 7>(subspan<2>(in, j + 8));
515 out[i + 11] = pixelOps.template blend<13, 4>(subspan<2>(in, j + 9));
516 out[i + 12] = pixelOps.template blend<16, 1>(subspan<2>(in, j + 10));
517 out[i + 13] = in[j + 11];
518 out[i + 14] = pixelOps.template blend< 2, 15>(subspan<2>(in, j + 11));
519 out[i + 15] = pixelOps.template blend< 5, 12>(subspan<2>(in, j + 12));
520 out[i + 16] = pixelOps.template blend< 8, 9>(subspan<2>(in, j + 13));
521 out[i + 17] = pixelOps.template blend<11, 6>(subspan<2>(in, j + 14));
522 out[i + 18] = pixelOps.template blend<14, 3>(subspan<2>(in, j + 15));
523 out[i + 19] = in[j + 16];
524 }
525}
526
527inline void scale_9on10(std::span<const Pixel> in, std::span<Pixel> out)
528{
529 assert((in.size() / 9) == (out.size() / 10));
530 PixelOperations pixelOps;
531 size_t n = out.size();
532 assert((n % 10) == 0);
533 for (size_t i = 0, j = 0; i < n; i += 10, j += 9) {
534 out[i + 0] = in[j + 0];
535 out[i + 1] = pixelOps.template blend<1, 8>(subspan<2>(in, j + 0));
536 out[i + 2] = pixelOps.template blend<2, 7>(subspan<2>(in, j + 1));
537 out[i + 3] = pixelOps.template blend<3, 6>(subspan<2>(in, j + 2));
538 out[i + 4] = pixelOps.template blend<4, 5>(subspan<2>(in, j + 3));
539 out[i + 5] = pixelOps.template blend<5, 4>(subspan<2>(in, j + 4));
540 out[i + 6] = pixelOps.template blend<6, 3>(subspan<2>(in, j + 5));
541 out[i + 7] = pixelOps.template blend<7, 2>(subspan<2>(in, j + 6));
542 out[i + 8] = pixelOps.template blend<8, 1>(subspan<2>(in, j + 7));
543 out[i + 9] = in[j + 8];
544 }
545}
546
547template<unsigned w1, unsigned w2>
548void blendLines(std::span<const Pixel> in1, std::span<const Pixel> in2, std::span<Pixel> out)
549{
550 // It _IS_ allowed that the output is the same as one of the inputs.
551 // TODO SSE optimizations
552 // pure C++ version
553 assert(in1.size() == in2.size());
554 assert(in1.size() == out.size());
555 PixelOperations pixelOps;
556 for (auto [i1, i2, o] : view::zip_equal(in1, in2, out)) {
557 o = pixelOps.template blend<w1, w2>(i1, i2);
558 }
559}
560
561inline void alphaBlendLines(
562 std::span<const Pixel> in1, std::span<const Pixel> in2, std::span<Pixel> out)
563{
564 // It _IS_ allowed that the output is the same as one of the inputs.
565 assert(in1.size() == in2.size());
566 assert(in1.size() == out.size());
567 PixelOperations pixelOps;
568 for (auto [i1, i2, o] : view::zip_equal(in1, in2, out)) {
569 o = pixelOps.alphaBlend(i1, i2);
570 }
571}
572
573inline void alphaBlendLines(
574 Pixel in1, std::span<const Pixel> in2, std::span<Pixel> out)
575{
576 // It _IS_ allowed that the output is the same as the input.
577
578 // ATM this routine is only called when 'in1' is not fully opaque nor
579 // fully transparent.
580 assert(in2.size() == out.size());
581
582 PixelOperations pixelOps;
583 unsigned alpha = pixelOps.alpha(in1);
584
585 // When one of the two colors is loop-invariant, using the
586 // pre-multiplied-alpha-blending equation is a tiny bit more efficient
587 // than using alphaBlend() or even lerp().
588 // for (auto i : xrange(width)) {
589 // out[i] = pixelOps.lerp(in1, in2[i], alpha);
590 // }
591 Pixel in1M = pixelOps.multiply(in1, alpha);
592 unsigned alpha2 = 256 - alpha;
593 for (auto [i2, o] : view::zip_equal(in2, out)) {
594 o = in1M + pixelOps.multiply(i2, alpha2);
595 }
596}
597
598} // namespace openmsx
599
600#endif
unsigned alpha(Pixel p) const
static Pixel multiply(Pixel p, unsigned x)
Perform a component wise multiplication of a pixel with an 8-bit fractional value: result = (pixel * ...
Pixel alphaBlend(Pixel p1, Pixel p2) const
Perform alpha blending of two pixels.
imat3 l3(ivec3(0, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
This file implemented 3 utility functions:
Definition Autofire.cc:11
void scale_3on1(std::span< const Pixel > in, std::span< Pixel > out)
void blendLines(std::span< const Pixel > in1, std::span< const Pixel > in2, std::span< Pixel > out)
BlendLines functor Generate an output line that is an interpolation of two input lines.
void scale_2on1(std::span< const Pixel > in, std::span< Pixel > out)
void scale_7on8(std::span< const Pixel > in, std::span< Pixel > out)
void scale_2on3(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on9(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on5(std::span< const Pixel > in, std::span< Pixel > out)
void scale_6on1(std::span< const Pixel > in, std::span< Pixel > out)
void scale_1on2(std::span< const Pixel > in, std::span< Pixel > out)
void Scale_1on2(std::span< const Pixel > in, std::span< Pixel > out)
void scale_1on3(std::span< const Pixel > in, std::span< Pixel > out)
Scale_XonY functions Transforms an input line of pixel to an output line (possibly) with a different ...
void scale_17on20(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on3(std::span< const Pixel > in, std::span< Pixel > out)
void scale_3on2(std::span< const Pixel > in, std::span< Pixel > out)
void scale_3on8(std::span< const Pixel > in, std::span< Pixel > out)
void scale_2on9(std::span< const Pixel > in, std::span< Pixel > out)
void alphaBlendLines(std::span< const Pixel > in1, std::span< const Pixel > in2, std::span< Pixel > out)
AlphaBlendLines functor Generate an output line that is a per-pixel-alpha-blend of the two input line...
void scale_9on10(std::span< const Pixel > in, std::span< Pixel > out)
void scale_1on4(std::span< const Pixel > in, std::span< Pixel > out)
CharacterConverter::Pixel Pixel
void scale_1on6(std::span< const Pixel > in, std::span< Pixel > out)
void scale_4on1(std::span< const Pixel > in, std::span< Pixel > out)
void scale_3on4(std::span< const Pixel > in, std::span< Pixel > out)
void scale_8on9(std::span< const Pixel > in, std::span< Pixel > out)
void scale_8on3(std::span< const Pixel > in, std::span< Pixel > out)
auto zip_equal(Ranges &&... ranges)
Definition view.hh:559
constexpr auto xrange(T e)
Definition xrange.hh:132