12 #include "emmintrin.h"
15 #include "tmmintrin.h"
22 template<
typename CLASS,
typename TAG>
struct IsTagged
23 : std::is_base_of<TAG, CLASS> {};
226 template<std::
unsigned_
integral Pixel,
unsigned w1 = 1,
unsigned w2 = 1>
class BlendLines
231 Pixel* out,
size_t width);
238 template<std::
unsigned_
integral Pixel>
244 Pixel* out,
unsigned outWidth)
const;
263 Pixel* out,
size_t width);
265 Pixel* out,
size_t width);
283 template<std::
unsigned_
integral Pixel>
302 [[nodiscard]]
virtual bool isCopy()
const = 0;
311 template<std::
unsigned_
integral Pixel,
typename Scaler>
325 scaler(in, out, outWidth);
327 [[nodiscard]]
bool isCopy()
const override
338 template<std::
unsigned_
integral Pixel,
typename Scaler>
348 scaler(in, out, outWidth);
350 [[nodiscard]]
bool isCopy()
const override
361 template<std::
unsigned_
integral Pixel,
unsigned N>
362 static inline void scale_1onN(
363 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
366 for (; i < (width - (
N - 1)); i +=
N, j += 1) {
373 if ((i + k) < width) out[i + k] = 0;
377 template<std::
unsigned_
integral Pixel>
380 scale_1onN<Pixel, 3>(in, out, width);
383 template<std::
unsigned_
integral Pixel>
386 scale_1onN<Pixel, 4>(in, out, width);
389 template<std::
unsigned_
integral Pixel>
392 scale_1onN<Pixel, 6>(in, out, width);
396 template<std::
unsigned_
integral Pixel>
inline __m128i unpacklo(__m128i
x, __m128i y)
398 if constexpr (
sizeof(
Pixel) == 4) {
399 return _mm_unpacklo_epi32(
x, y);
400 }
else if constexpr (
sizeof(
Pixel) == 2) {
401 return _mm_unpacklo_epi16(
x, y);
406 template<std::
unsigned_
integral Pixel>
inline __m128i unpackhi(__m128i
x, __m128i y)
408 if constexpr (
sizeof(
Pixel) == 4) {
409 return _mm_unpackhi_epi32(
x, y);
410 }
else if constexpr (
sizeof(
Pixel) == 2) {
411 return _mm_unpackhi_epi16(
x, y);
417 template<std::
unsigned_
integral Pixel>
418 inline void scale_1on2_SSE(
const Pixel* in_,
Pixel* out_,
size_t srcWidth)
420 size_t bytes = srcWidth *
sizeof(
Pixel);
421 assert((bytes % (4 *
sizeof(__m128i))) == 0);
424 const auto* in =
reinterpret_cast<const char*
>(in_) + bytes;
425 auto* out =
reinterpret_cast< char*
>(out_) + 2 * bytes;
427 auto x = -ptrdiff_t(bytes);
429 __m128i a0 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in +
x + 0));
430 __m128i a1 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in +
x + 16));
431 __m128i a2 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in +
x + 32));
432 __m128i a3 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in +
x + 48));
433 __m128i l0 = unpacklo<Pixel>(a0, a0);
434 __m128i h0 = unpackhi<Pixel>(a0, a0);
435 __m128i l1 = unpacklo<Pixel>(a1, a1);
436 __m128i h1 = unpackhi<Pixel>(a1, a1);
437 __m128i l2 = unpacklo<Pixel>(a2, a2);
438 __m128i h2 = unpackhi<Pixel>(a2, a2);
439 __m128i
l3 = unpacklo<Pixel>(a3, a3);
440 __m128i h3 = unpackhi<Pixel>(a3, a3);
441 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 0), l0);
442 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 16), h0);
443 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 32), l1);
444 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 48), h1);
445 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 64), l2);
446 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 80), h2);
447 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 96),
l3);
448 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out + 2*
x + 112), h3);
449 x += 4 *
sizeof(__m128i);
454 template<std::
unsigned_
integral Pixel>
456 const Pixel* __restrict in,
Pixel* __restrict out,
size_t dstWidth)
470 size_t srcWidth = dstWidth / 2;
473 size_t chunk = 4 *
sizeof(__m128i) /
sizeof(
Pixel);
474 size_t srcWidth2 = srcWidth & ~(chunk - 1);
475 scale_1on2_SSE(in, out, srcWidth2);
477 out += 2 * srcWidth2;
478 srcWidth -= srcWidth2;
483 for (
auto x :
xrange(srcWidth)) {
484 out[
x * 2] = out[
x * 2 + 1] = in[
x];
492 inline void memcpy_SSE_128(
493 const void* __restrict in_,
void* __restrict out_,
size_t size)
495 assert((
reinterpret_cast<size_t>(in_ ) % 16) == 0);
496 assert((
reinterpret_cast<size_t>(out_) % 16) == 0);
497 assert((
size % 128) == 0);
500 const auto* in =
reinterpret_cast<const __m128i*
>(in_);
501 auto* out =
reinterpret_cast< __m128i*
>(out_);
502 const auto*
end = in + (
size /
sizeof(__m128i));
518 template<std::
unsigned_
integral Pixel>
520 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
522 size_t nBytes = width *
sizeof(
Pixel);
528 size_t n128 = nBytes & ~127;
529 memcpy_SSE_128(in, out, n128);
531 if (nBytes == 0) [[likely]]
return;
532 in += n128 /
sizeof(
Pixel);
533 out += n128 /
sizeof(
Pixel);
536 memcpy(out, in, nBytes);
540 template<std::
unsigned_
integral Pixel>
542 : pixelOps(pixelOps_)
547 template<
int IMM8>
static inline __m128i shuffle(__m128i
x, __m128i y)
549 return _mm_castps_si128(_mm_shuffle_ps(
550 _mm_castsi128_ps(
x), _mm_castsi128_ps(y), IMM8));
553 template<std::
unsigned_
integral Pixel>
554 inline __m128i blend(__m128i
x, __m128i y,
Pixel mask)
556 if constexpr (
sizeof(
Pixel) == 4) {
558 __m128i p = shuffle<0x88>(
x, y);
559 __m128i q = shuffle<0xDD>(
x, y);
560 return _mm_avg_epu8(p, q);
565 const __m128i LL = _mm_set_epi8(
566 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
567 0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00);
568 const __m128i
HL = _mm_set_epi8(
569 0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00,
570 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
571 const __m128i LH = _mm_set_epi8(
572 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
573 0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02);
574 const __m128i HH = _mm_set_epi8(
575 0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02,
576 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
577 __m128i ll = _mm_shuffle_epi8(
x, LL);
578 __m128i hl = _mm_shuffle_epi8(y,
HL);
579 __m128i lh = _mm_shuffle_epi8(
x, LH);
580 __m128i hh = _mm_shuffle_epi8(y, HH);
581 __m128i p = _mm_or_si128(ll, hl);
582 __m128i q = _mm_or_si128(lh, hh);
586 __m128i s = _mm_unpacklo_epi16(
x, y);
587 __m128i
t = _mm_unpackhi_epi16(
x, y);
588 __m128i u = _mm_unpacklo_epi16(s,
t);
589 __m128i v = _mm_unpackhi_epi16(s,
t);
590 __m128i p = _mm_unpacklo_epi16(u, v);
591 __m128i q = _mm_unpackhi_epi16(u, v);
594 __m128i m = _mm_set1_epi16(
mask);
595 __m128i a = _mm_and_si128(p, q);
596 __m128i b = _mm_xor_si128(p, q);
597 __m128i c = _mm_and_si128(b, m);
598 __m128i d = _mm_srli_epi16(c, 1);
599 return _mm_add_epi16(a, d);
603 template<std::
unsigned_
integral Pixel>
604 inline void scale_2on1_SSE(
605 const Pixel* __restrict in_,
Pixel* __restrict out_,
size_t dstBytes,
608 assert((dstBytes % (4 *
sizeof(__m128i))) == 0);
609 assert(dstBytes != 0);
611 const auto* in =
reinterpret_cast<const char*
>(in_) + 2 * dstBytes;
612 auto* out =
reinterpret_cast< char*
>(out_) + dstBytes;
614 auto x = -ptrdiff_t(dstBytes);
616 __m128i a0 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 0));
617 __m128i a1 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 16));
618 __m128i a2 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 32));
619 __m128i a3 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 48));
620 __m128i a4 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 64));
621 __m128i a5 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 80));
622 __m128i a6 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 96));
623 __m128i a7 = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in + 2*
x + 112));
624 __m128i b0 = blend(a0, a1,
mask);
625 __m128i b1 = blend(a2, a3,
mask);
626 __m128i b2 = blend(a4, a5,
mask);
627 __m128i b3 = blend(a6, a7,
mask);
628 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out +
x + 0), b0);
629 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out +
x + 16), b1);
630 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out +
x + 32), b2);
631 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out +
x + 48), b3);
632 x += 4 *
sizeof(__m128i);
637 template<std::
unsigned_
integral Pixel>
639 const Pixel* __restrict in,
Pixel* __restrict out,
size_t dstWidth)
642 size_t n64 = (dstWidth *
sizeof(
Pixel)) & ~63;
644 scale_2on1_SSE(in, out, n64,
mask);
645 dstWidth &= ((64 /
sizeof(
Pixel)) - 1);
646 if (dstWidth == 0) [[likely]]
return;
647 in += (2 * n64) /
sizeof(
Pixel);
648 out += n64 /
sizeof(
Pixel);
652 for (
auto i :
xrange(dstWidth)) {
653 out[i] = pixelOps.template blend<1, 1>(
654 in[2 * i + 0], in[2 * i + 1]);
659 template<std::
unsigned_
integral Pixel>
661 : pixelOps(pixelOps_)
665 template<std::
unsigned_
integral Pixel>
667 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
669 for (
auto i :
xrange(width)) {
670 out[i] = pixelOps.template blend6<1, 1, 1, 1, 1, 1>(&in[6 * i]);
675 template<std::
unsigned_
integral Pixel>
677 : pixelOps(pixelOps_)
681 template<std::
unsigned_
integral Pixel>
683 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
685 for (
auto i :
xrange(width)) {
686 out[i] = pixelOps.template blend4<1, 1, 1, 1>(&in[4 * i]);
691 template<std::
unsigned_
integral Pixel>
693 : pixelOps(pixelOps_)
697 template<std::
unsigned_
integral Pixel>
699 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
701 for (
auto i :
xrange(width)) {
702 out[i] = pixelOps.template blend3<1, 1, 1>(&in[3 * i]);
707 template<std::
unsigned_
integral Pixel>
709 : pixelOps(pixelOps_)
713 template<std::
unsigned_
integral Pixel>
715 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
718 for (; i < (width - 1); i += 2, j += 3) {
719 out[i + 0] = pixelOps.template blend2<2, 1>(&in[j + 0]);
720 out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 1]);
722 if (i < width) out[i] = 0;
726 template<std::
unsigned_
integral Pixel>
728 : pixelOps(pixelOps_)
732 template<std::
unsigned_
integral Pixel>
734 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
737 for (; i < (width - 3); i += 4, j += 3) {
738 out[i + 0] = in[j + 0];
739 out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 0]);
740 out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 1]);
741 out[i + 3] = in[j + 2];
743 for (
auto k :
xrange(4 - 1)) {
744 if ((i + k) < width) out[i + k] = 0;
749 template<std::
unsigned_
integral Pixel>
751 : pixelOps(pixelOps_)
755 template<std::
unsigned_
integral Pixel>
757 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
760 for (; i < (width - 7); i += 8, j += 3) {
761 out[i + 0] = in[j + 0];
762 out[i + 1] = in[j + 0];
763 out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 0]);
764 out[i + 3] = in[j + 1];
765 out[i + 4] = in[j + 1];
766 out[i + 5] = pixelOps.template blend2<1, 2>(&in[j + 1]);
767 out[i + 6] = in[j + 2];
768 out[i + 7] = in[j + 2];
770 for (
auto k :
xrange(8 - 1)) {
771 if ((i + k) < width) out[i + k] = 0;
776 template<std::
unsigned_
integral Pixel>
778 : pixelOps(pixelOps_)
782 template<std::
unsigned_
integral Pixel>
784 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
787 for (; i < (width - 2); i += 3, j += 2) {
788 out[i + 0] = in[j + 0];
789 out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 0]);
790 out[i + 2] = in[j + 1];
792 if ((i + 0) < width) out[i + 0] = 0;
793 if ((i + 1) < width) out[i + 1] = 0;
797 template<std::
unsigned_
integral Pixel>
799 : pixelOps(pixelOps_)
803 template<std::
unsigned_
integral Pixel>
805 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
808 for (; i < (width - 2); i += 3, j += 4) {
809 out[i + 0] = pixelOps.template blend2<3, 1>(&in[j + 0]);
810 out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 1]);
811 out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 2]);
813 if ((i + 0) < width) out[i + 0] = 0;
814 if ((i + 1) < width) out[i + 1] = 0;
818 template<std::
unsigned_
integral Pixel>
820 : pixelOps(pixelOps_)
824 template<std::
unsigned_
integral Pixel>
826 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
829 for (; i < (width - 2); i += 3, j += 8) {
830 out[i + 0] = pixelOps.template blend3<3, 3, 2> (&in[j + 0]);
831 out[i + 1] = pixelOps.template blend4<1, 3, 3, 1>(&in[j + 2]);
832 out[i + 2] = pixelOps.template blend3<2, 3, 3> (&in[j + 5]);
834 if ((i + 0) < width) out[i + 0] = 0;
835 if ((i + 1) < width) out[i + 1] = 0;
839 template<std::
unsigned_
integral Pixel>
841 : pixelOps(pixelOps_)
845 template<std::
unsigned_
integral Pixel>
847 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
850 for (; i < (width - 8); i += 9, j += 2) {
851 out[i + 0] = in[j + 0];
852 out[i + 1] = in[j + 0];
853 out[i + 2] = in[j + 0];
854 out[i + 3] = in[j + 0];
855 out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 0]);
856 out[i + 5] = in[j + 1];
857 out[i + 6] = in[j + 1];
858 out[i + 7] = in[j + 1];
859 out[i + 8] = in[j + 1];
861 if ((i + 0) < width) out[i + 0] = 0;
862 if ((i + 1) < width) out[i + 1] = 0;
863 if ((i + 2) < width) out[i + 2] = 0;
864 if ((i + 3) < width) out[i + 3] = 0;
865 if ((i + 4) < width) out[i + 4] = 0;
866 if ((i + 5) < width) out[i + 5] = 0;
867 if ((i + 6) < width) out[i + 6] = 0;
868 if ((i + 7) < width) out[i + 7] = 0;
872 template<std::
unsigned_
integral Pixel>
874 : pixelOps(pixelOps_)
878 template<std::
unsigned_
integral Pixel>
880 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
883 for (; i < (width - 8); i += 9, j += 4) {
884 out[i + 0] = in[j + 0];
885 out[i + 1] = in[j + 0];
886 out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 0]);
887 out[i + 3] = in[j + 1];
888 out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 1]);
889 out[i + 5] = in[j + 2];
890 out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 2]);
891 out[i + 7] = in[j + 3];
892 out[i + 8] = in[j + 3];
894 if ((i + 0) < width) out[i + 0] = 0;
895 if ((i + 1) < width) out[i + 1] = 0;
896 if ((i + 2) < width) out[i + 2] = 0;
897 if ((i + 3) < width) out[i + 3] = 0;
898 if ((i + 4) < width) out[i + 4] = 0;
899 if ((i + 5) < width) out[i + 5] = 0;
900 if ((i + 6) < width) out[i + 6] = 0;
901 if ((i + 7) < width) out[i + 7] = 0;
905 template<std::
unsigned_
integral Pixel>
907 : pixelOps(pixelOps_)
911 template<std::
unsigned_
integral Pixel>
913 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
916 for (; i < (width - 8); i += 9, j += 8) {
917 out[i + 0] = in[j + 0];
918 out[i + 1] = pixelOps.template blend2<1, 7>(&in[j + 0]);
919 out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 1]);
920 out[i + 3] = pixelOps.template blend2<3, 5>(&in[j + 2]);
921 out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 3]);
922 out[i + 5] = pixelOps.template blend2<5, 3>(&in[j + 4]);
923 out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 5]);
924 out[i + 7] = pixelOps.template blend2<7, 1>(&in[j + 6]);
925 out[i + 8] = in[j + 7];
927 if ((i + 0) < width) out[i + 0] = 0;
928 if ((i + 1) < width) out[i + 1] = 0;
929 if ((i + 2) < width) out[i + 2] = 0;
930 if ((i + 3) < width) out[i + 3] = 0;
931 if ((i + 4) < width) out[i + 4] = 0;
932 if ((i + 5) < width) out[i + 5] = 0;
933 if ((i + 6) < width) out[i + 6] = 0;
934 if ((i + 7) < width) out[i + 7] = 0;
938 template<std::
unsigned_
integral Pixel>
940 : pixelOps(pixelOps_)
944 template<std::
unsigned_
integral Pixel>
946 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
948 assert((width % 5) == 0);
949 for (
size_t i = 0, j = 0; i < width; i += 5, j += 4) {
950 out[i + 0] = in[j + 0];
951 out[i + 1] = pixelOps.template blend2<1, 3>(&in[j + 0]);
952 out[i + 2] = pixelOps.template blend2<1, 1>(&in[j + 1]);
953 out[i + 3] = pixelOps.template blend2<3, 1>(&in[j + 2]);
954 out[i + 4] = in[j + 3];
959 template<std::
unsigned_
integral Pixel>
961 : pixelOps(pixelOps_)
965 template<std::
unsigned_
integral Pixel>
967 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
969 assert((width % 8) == 0);
970 for (
size_t i = 0, j = 0; i < width; i += 8, j += 7) {
971 out[i + 0] = in[j + 0];
972 out[i + 1] = pixelOps.template blend2<1, 6>(&in[j + 0]);
973 out[i + 2] = pixelOps.template blend2<2, 5>(&in[j + 1]);
974 out[i + 3] = pixelOps.template blend2<3, 4>(&in[j + 2]);
975 out[i + 4] = pixelOps.template blend2<4, 3>(&in[j + 3]);
976 out[i + 5] = pixelOps.template blend2<5, 2>(&in[j + 4]);
977 out[i + 6] = pixelOps.template blend2<6, 1>(&in[j + 5]);
978 out[i + 7] = in[j + 6];
983 template<std::
unsigned_
integral Pixel>
985 : pixelOps(pixelOps_)
989 template<std::
unsigned_
integral Pixel>
991 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
993 assert((width % 20) == 0);
994 for (
size_t i = 0, j = 0; i < width; i += 20, j += 17) {
995 out[i + 0] = in[j + 0];
996 out[i + 1] = pixelOps.template blend2< 3, 14>(&in[j + 0]);
997 out[i + 2] = pixelOps.template blend2< 6, 11>(&in[j + 1]);
998 out[i + 3] = pixelOps.template blend2< 9, 8>(&in[j + 2]);
999 out[i + 4] = pixelOps.template blend2<12, 5>(&in[j + 3]);
1000 out[i + 5] = pixelOps.template blend2<15, 2>(&in[j + 4]);
1001 out[i + 6] = in[j + 5];
1002 out[i + 7] = pixelOps.template blend2< 1, 16>(&in[j + 5]);
1003 out[i + 8] = pixelOps.template blend2< 4, 13>(&in[j + 6]);
1004 out[i + 9] = pixelOps.template blend2< 7, 10>(&in[j + 7]);
1005 out[i + 10] = pixelOps.template blend2<10, 7>(&in[j + 8]);
1006 out[i + 11] = pixelOps.template blend2<13, 4>(&in[j + 9]);
1007 out[i + 12] = pixelOps.template blend2<16, 1>(&in[j + 10]);
1008 out[i + 13] = in[j + 11];
1009 out[i + 14] = pixelOps.template blend2< 2, 15>(&in[j + 11]);
1010 out[i + 15] = pixelOps.template blend2< 5, 12>(&in[j + 12]);
1011 out[i + 16] = pixelOps.template blend2< 8, 9>(&in[j + 13]);
1012 out[i + 17] = pixelOps.template blend2<11, 6>(&in[j + 14]);
1013 out[i + 18] = pixelOps.template blend2<14, 3>(&in[j + 15]);
1014 out[i + 19] = in[j + 16];
1019 template<std::
unsigned_
integral Pixel>
1021 : pixelOps(pixelOps_)
1025 template<std::
unsigned_
integral Pixel>
1027 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
1029 assert((width % 10) == 0);
1030 for (
size_t i = 0, j = 0; i < width; i += 10, j += 9) {
1031 out[i + 0] = in[j + 0];
1032 out[i + 1] = pixelOps.template blend2<1, 8>(&in[j + 0]);
1033 out[i + 2] = pixelOps.template blend2<2, 7>(&in[j + 1]);
1034 out[i + 3] = pixelOps.template blend2<3, 6>(&in[j + 2]);
1035 out[i + 4] = pixelOps.template blend2<4, 5>(&in[j + 3]);
1036 out[i + 5] = pixelOps.template blend2<5, 4>(&in[j + 4]);
1037 out[i + 6] = pixelOps.template blend2<6, 3>(&in[j + 5]);
1038 out[i + 7] = pixelOps.template blend2<7, 2>(&in[j + 6]);
1039 out[i + 8] = pixelOps.template blend2<8, 1>(&in[j + 7]);
1040 out[i + 9] = in[j + 8];
1045 template<std::
unsigned_
integral Pixel,
unsigned w1,
unsigned w2>
1047 : pixelOps(pixelOps_)
1051 template<std::
unsigned_
integral Pixel,
unsigned w1,
unsigned w2>
1058 for (
auto i :
xrange(width)) {
1059 out[i] = pixelOps.template blend<w1, w2>(in1[i], in2[i]);
1064 template<std::
unsigned_
integral Pixel>
1066 : pixelOps(pixelOps_)
1070 template<std::
unsigned_
integral Pixel>
1072 const Pixel* in,
unsigned inWidth,
1073 Pixel* out,
unsigned outWidth)
const
1075 constexpr
unsigned FACTOR = 256;
1077 unsigned step = FACTOR * inWidth / outWidth;
1078 unsigned i = 0 * FACTOR;
1079 for (
auto o :
xrange(outWidth)) {
1080 Pixel p0 = in[(i / FACTOR) + 0];
1081 Pixel p1 = in[(i / FACTOR) + 1];
1082 out[o] = pixelOps.lerp(p0, p1, i % FACTOR);
1088 template<std::
unsigned_
integral Pixel>
1090 : pixelOps(pixelOps_)
1094 template<std::
unsigned_
integral Pixel>
1099 for (
auto i :
xrange(width)) {
1100 out[i] = pixelOps.alphaBlend(in1[i], in2[i]);
1104 template<std::
unsigned_
integral Pixel>
1112 assert(
sizeof(
Pixel) == 4);
1114 unsigned alpha = pixelOps.alpha(in1);
1122 Pixel in1M = pixelOps.multiply(in1, alpha);
1123 unsigned alpha2 = 256 - alpha;
1124 for (
auto i :
xrange(width)) {
1125 out[i] = in1M + pixelOps.multiply(in2[i], alpha2);
AlphaBlendLines functor Generate an output line that is a per-pixel-alpha-blend of the two input line...
AlphaBlendLines(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in1, const Pixel *in2, Pixel *out, size_t width)
BlendLines functor Generate an output line that is an interpolation of two input lines.
BlendLines(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in1, const Pixel *in2, Pixel *out, size_t width)
virtual bool isCopy() const =0
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
~PolyLineScaler()=default
virtual void operator()(const Pixel *in, Pixel *out, size_t outWidth)=0
Actually scale a line.
Like PolyScale above, but instead keeps a reference to the actual scaler.
bool isCopy() const override
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
void operator()(const Pixel *in, Pixel *out, size_t outWidth) override
Actually scale a line.
PolyScaleRef(Scaler &scaler_)
Polymorphic wrapper around another line scaler.
PolyScale(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t outWidth) override
Actually scale a line.
bool isCopy() const override
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
Scale_17on20(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_XonY functors Transforms an input line of pixel to an output line (possibly) with a different w...
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_2on1(PixelOperations< Pixel > pixelOps)
Scale_2on3(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_2on9(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_3on1(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_3on2(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_3on4(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_3on8(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_4on1(PixelOperations< Pixel > pixelOps)
Scale_4on3(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_4on5(PixelOperations< Pixel > pixelOps)
Scale_4on9(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_6on1(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_7on8(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_8on3(PixelOperations< Pixel > pixelOps)
Scale_8on9(PixelOperations< Pixel > pixelOps)
void operator()(const Pixel *in, Pixel *out, size_t width)
void operator()(const Pixel *in, Pixel *out, size_t width)
Scale_9on10(PixelOperations< Pixel > pixelOps)
Abstract base class for scalers.
Stretch (or zoom) a given input line to a wider output line.
void operator()(const Pixel *in, unsigned inWidth, Pixel *out, unsigned outWidth) const
ZoomLine(PixelOperations< Pixel > pixelOps)
imat3 l3(ivec3(0, 2, 3), ivec3(4, 5, 6), ivec3(7, 8, 9))
This file implemented 3 utility functions:
constexpr KeyMatrixPosition x
Keyboard bindings.
constexpr nibble mask[4][13]
size_t size(std::string_view utf8)
constexpr auto xrange(T e)
constexpr auto end(const zstring_view &x)