openMSX
DivModByConst.hh
Go to the documentation of this file.
1#ifndef DIVMODBYCONST_HH
2#define DIVMODBYCONST_HH
3
4#include "uint128.hh"
5#include <bit>
6#include <cstdint>
7#include <type_traits>
8
21
23 uint32_t divisor, shift;
24};
25[[nodiscard]] constexpr Reduce0Result reduce0(uint32_t divisor)
26{
27 uint32_t shift = 0;
28 while ((divisor & 1) == 0) {
29 divisor >>= 1;
30 ++shift;
31 }
32 return {divisor, shift};
33}
34
36 uint64_t m;
37 uint32_t s;
38};
39[[nodiscard]] constexpr Reduce1Result reduce1(uint64_t m, uint32_t s)
40{
41 while (!(m & 1)) {
42 m >>= 1;
43 --s;
44 }
45 return {m, s};
46}
47
50 uint32_t l;
51};
52[[nodiscard]] constexpr Reduce2Result reduce2(uint128 m_low, uint128 m_high, uint32_t l)
53{
54 while (((m_low >> 1) < (m_high >> 1)) && (l > 0)) {
55 m_low >>= 1;
56 m_high >>= 1;
57 --l;
58 }
59 return {m_low, m_high, l};
60}
61
62
63[[nodiscard]] static constexpr uint64_t mla64(uint64_t a, uint64_t b, uint64_t c)
64{
65 // equivalent to this:
66 // return (__uint128_t(a) * b + c) >> 64;
67 uint64_t t1 = uint64_t(uint32_t(a)) * uint32_t(b);
68 uint64_t t2 = (a >> 32) * uint32_t(b);
69 uint64_t t3 = uint32_t(a) * (b >> 32);
70 uint64_t t4 = (a >> 32) * (b >> 32);
71
72 uint64_t s1 = uint64_t(uint32_t(c)) + uint32_t(t1);
73 uint64_t s2 = (s1 >> 32) + (c >> 32) + (t1 >> 32) + t2;
74 uint64_t s3 = uint64_t(uint32_t(s2)) + uint32_t(t3);
75 uint64_t s4 = (s3 >> 32) + (s2 >> 32) + (t3 >> 32) + t4;
76 return s4;
77}
78
79template<uint32_t DIVISOR>
80[[nodiscard]] constexpr auto getAlgorithm()
81{
82 // reduce to odd divisor
83 constexpr auto r0 = reduce0(DIVISOR);
84 if constexpr (r0.divisor == 1) {
85 // algorithm 1: division possible by only shifting
86 constexpr uint32_t shift = r0.shift;
87 return [=](uint64_t dividend) {
88 return dividend >> shift;
89 };
90 } else {
91 constexpr uint32_t L = std::bit_width(r0.divisor);
92 constexpr uint64_t J = 0xffffffffffffffffull % r0.divisor;
93 constexpr uint128 L64 = uint128(1) << (L + 64);
94 constexpr uint128 k = L64 / (0xffffffffffffffffull - J);
95 constexpr uint128 m_low = L64 / r0.divisor;
96 constexpr uint128 m_high = (L64 + k) / r0.divisor;
97 constexpr auto r2 = reduce2(m_low, m_high, L);
98
99 if constexpr (high64(r2.m_high) == 0) {
100 // algorithm 2: division possible by multiplication and shift
101 constexpr auto r1 = reduce1(low64(r2.m_high), r0.shift + r2.l);
102
103 constexpr uint64_t mul = r1.m;
104 constexpr uint32_t shift = r1.s;
105 return [=](uint64_t dividend) {
106 return mla64(dividend, mul, 0) >> shift;
107 };
108 } else {
109 // algorithm 3: division possible by multiplication, addition and shift
110 constexpr uint32_t S = std::bit_width(r0.divisor) - 1;
111 constexpr uint128 S64 = uint128(1) << (S + 64);
112 constexpr uint128 dq = S64 / r0.divisor;
113 constexpr uint128 dr = S64 % r0.divisor;
114 constexpr uint64_t M = low64(dq) + (low64(dr) > (r0.divisor / 2));
115 constexpr auto r1 = reduce1(M, S + r0.shift);
116
117 constexpr uint64_t mul = r1.m;
118 constexpr uint32_t shift = r1.s;
119 return [=](uint64_t dividend) {
120 return mla64(dividend, mul, mul) >> shift;
121 };
122 }
123 }
124}
125
126} // namespace DivModByConstPrivate
127
128
129template<uint32_t DIVISOR> struct DivModByConst
130{
131 [[nodiscard]] constexpr uint32_t div(uint64_t dividend) const
132 {
133 #ifdef __x86_64
134 // on 64-bit CPUs gcc already does this
135 // optimization (and better)
136 uint64_t result = dividend / DIVISOR;
137 #else
138 auto algorithm = DivModByConstPrivate::getAlgorithm<DIVISOR>();
139 uint64_t result = algorithm(dividend);
140 #endif
141 #ifdef DEBUG
142 // we don't even want this overhead in devel builds
143 assert(result == uint32_t(result));
144 #endif
145 return uint32_t(result);
146 }
147
148 [[nodiscard]] constexpr uint32_t mod(uint64_t dividend) const
149 {
150 #ifdef __x86_64
151 uint64_t result = dividend % DIVISOR;
152 #else
153 uint64_t result = dividend - DIVISOR * div(dividend);
154 #endif
155 #ifdef DEBUG
156 // we don't even want this overhead in devel builds
157 assert(result == uint32_t(result));
158 #endif
159 return uint32_t(result);
160 }
161};
162
163#endif // DIVMODBYCONST_HH
Unsigned 128-bit integer type.
Definition: uint128.hh:24
Utility class to optimize 64-bit divide/module by a 32-bit constant.
constexpr auto getAlgorithm()
constexpr Reduce1Result reduce1(uint64_t m, uint32_t s)
constexpr Reduce2Result reduce2(uint128 m_low, uint128 m_high, uint32_t l)
constexpr Reduce0Result reduce0(uint32_t divisor)
EndianT< uint64_t, ConvLittle< BIG > > L64
Definition: endian.hh:121
constexpr uint32_t div(uint64_t dividend) const
constexpr uint32_t mod(uint64_t dividend) const
Definition: stl_test.cc:7