openMSX
DivModByConst.hh
Go to the documentation of this file.
1 #ifndef DIVMODBYCONST
2 #define DIVMODBYCONST
3 
4 #include "build-info.hh"
5 #include "Math.hh"
6 #include <type_traits>
7 #include <cstdint>
8 
21 
22 // Utility class to perform 128-bit by 128-bit division at compilation time
23 template<uint64_t RH, uint64_t RL, uint64_t QH, uint64_t QL, uint64_t DH, uint64_t DL, uint32_t BITS>
25 {
26  static const uint64_t QL2 = (QL << 1);
27  static const uint64_t QH2 = (QH << 1) + (QL2 < QL);
28  static const uint64_t RL2 = (RL << 1) + (QH2 < QH);
29  static const uint64_t RH2 = (RH << 1) + (RL2 < RL);
30 
31  static const bool C = (RH2 != DH) ? (RH2 < DH) : (RL2 < DL);
32  static const uint64_t RL3 = C ? RL2 : RL2 - DL;
33  static const uint64_t RH3 = C ? RH2 : RH2 - DH - (RL3 > RL2);
34  static const uint64_t QL3 = C ? QL2 : QL2 + 1;
35  static const uint64_t QH3 = C ? QH2 : ((QL3 != 0) ? QH2 : QH2 + 1);
36 
37  using Div = Div128_helper<RH3, RL3, QH3, QL3, DH, DL, BITS - 1>;
38  static const uint64_t quotientLow = Div::quotientLow;
39  static const uint64_t quotientHigh = Div::quotientHigh;
40  static const uint64_t remainderLow = Div::remainderLow;
41  static const uint64_t remainderHigh = Div::remainderHigh;
42 };
43 template<uint64_t RH, uint64_t RL, uint64_t QH, uint64_t QL, uint64_t DH, uint64_t DL>
44 struct Div128_helper<RH, RL, QH, QL, DH, DL, 0>
45 {
46  static const uint64_t quotientLow = QL;
47  static const uint64_t quotientHigh = QH;
48  static const uint64_t remainderLow = RL;
49  static const uint64_t remainderHigh = RH;
50 };
51 template<uint64_t DividendH, uint64_t DividendL, uint64_t DividerH, uint64_t DividerL>
52 struct Div128
53  : Div128_helper<0, 0, DividendH, DividendL, DividerH, DividerL, 128> {};
54 
55 
56 // equivalent to the following run-time loop:
57 // while (!(M & 1)) {
58 // M >>= 1;
59 // --S;
60 // }
61 template<uint64_t M, uint32_t S, bool B = M & 1> struct DBCReduce
62 {
63  using R2 = DBCReduce<M / 2, S - 1>;
64  static const uint64_t M2 = R2::M2;
65  static const uint32_t S2 = R2::S2;
66 };
67 template<uint64_t M, uint32_t S> struct DBCReduce<M, S, true>
68 {
69  static const uint64_t M2 = M;
70  static const uint32_t S2 = S;
71 };
72 
73 // equivalent to the following run-tim loop:
74 // while (((m_low >> 1) < (m_high >> 1)) && (l > 0)) {
75 // m_low >>= 1;
76 // m_high >>= 1;
77 // --l;
78 // }
79 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL>
81 {
82  static const uint64_t AH2 = AH / 2;
83  static const uint64_t AL2 = AL / 2 + ((AH2 * 2 != AH) ? (1ull << 63) : 0);
84  static const uint64_t BH2 = BH / 2;
85  static const uint64_t BL2 = BL / 2 + ((BH2 * 2 != BH) ? (1ull << 63) : 0);
86 };
87 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t L>
89 {
91  static const bool C = (S::AH2 != S::BH2) ? (S::AH2 < S::BH2)
92  : (S::AL2 < S::BL2);
93  static const bool value = C && (L > 0);
94 };
95 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t LL, bool B>
97 {
99  using T = DBCReduce2Test<S::AH2, S::AL2, S::BH2, S::BL2, LL - 1>;
100  using R = DBCReduce2Loop<S::AH2, S::AL2, S::BH2, S::BL2, LL - 1, T::value>;
101  static const uint64_t MLH = R::MLH;
102  static const uint64_t MLL = R::MLL;
103  static const uint64_t MHH = R::MHH;
104  static const uint64_t MHL = R::MHL;
105  static const uint32_t L = R::L;
106 };
107 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t LL>
108 struct DBCReduce2Loop<AH, AL, BH, BL, LL, false>
109 {
110  static const uint64_t MLH = AH;
111  static const uint64_t MLL = AL;
112  static const uint64_t MHH = BH;
113  static const uint64_t MHL = BL;
114  static const uint32_t L = LL;
115 };
116 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t LL>
118 {
121  static const uint64_t MLH = R::MLH;
122  static const uint64_t MLL = R::MLL;
123  static const uint64_t MHH = R::MHH;
124  static const uint64_t MHL = R::MHL;
125  static const uint32_t L = R::L;
126 };
127 
128 
129 template<uint32_t S> struct DBCAlgo1
130 {
131  // division possible by only shifting
132  uint32_t operator()(uint64_t dividend) const
133  {
134  return dividend >> S;
135  }
136 };
137 
138 static inline uint64_t mla64(uint64_t a, uint64_t b, uint64_t c)
139 {
140  // equivalent to this:
141  // return (__uint128_t(a) * b + c) >> 64;
142  uint64_t t1 = uint64_t(uint32_t(a)) * uint32_t(b);
143  uint64_t t2 = (a >> 32) * uint32_t(b);
144  uint64_t t3 = uint32_t(a) * (b >> 32);
145  uint64_t t4 = (a >> 32) * (b >> 32);
146 
147  uint64_t s1 = uint64_t(uint32_t(c)) + uint32_t(t1);
148  uint64_t s2 = (s1 >> 32) + (c >> 32) + (t1 >> 32) + t2;
149  uint64_t s3 = uint64_t(uint32_t(s2)) + uint32_t(t3);
150  uint64_t s4 = (s3 >> 32) + (s2 >> 32) + (t3 >> 32) + t4;
151  return s4;
152 }
153 
154 template<uint64_t M, uint32_t S> struct DBCAlgo2
155 {
156  // division possible by multiplication and shift
157  uint32_t operator()(uint64_t dividend) const
158  {
159  using R = DBCReduce<M, S>;
160  uint64_t h = mla64(dividend, R::M2, 0);
161  uint64_t result = h >> R::S2;
162  #ifdef DEBUG
163  // we don't even want this overhead in devel builds
164  assert(result == uint32_t(result));
165  #endif
166  return uint32_t(result);
167  }
168 };
169 
170 template<uint32_t DIVISOR, uint32_t N> struct DBCAlgo3
171 {
172  // division possible by multiplication, addition and shift
173  static const uint32_t S = Math::log2p1(DIVISOR) - 1;
175  static const uint64_t M = D::quotientLow + (D::remainderLow > (DIVISOR / 2));
176 
177  uint32_t operator()(uint64_t dividend) const
178  {
179  using R = DBCReduce<M, S + N>;
180  uint64_t h = mla64(dividend, R::M2, R::M2);
181  return h >> R::S2;
182  }
183 };
184 
185 
186 template<uint32_t DIVISOR, uint32_t N, typename RM> struct DBCHelper3
187  : std::conditional_t<RM::MHH == 0, DBCAlgo2<RM::MHL, N + RM::L>,
188  DBCAlgo3<DIVISOR, N>> {};
189 
190 template<uint32_t DIVISOR, uint32_t N> struct DBCHelper2
191 {
192  static const uint32_t L = Math::log2p1(DIVISOR);
193  static const uint64_t J = 0xffffffffffffffffull % DIVISOR;
194  using K = Div128<1 << L, 0, 0, 0xffffffffffffffffull - J>;
195 
198  using R = DBCReduce2<M_LOW ::quotientHigh, M_LOW ::quotientLow,
199  M_HIGH::quotientHigh, M_HIGH::quotientLow, L>;
200 
201  uint32_t operator()(uint64_t dividend) const
202  {
204  return dbc(dividend);
205  }
206 };
207 
208 template<uint32_t DIVISOR, uint32_t SHIFT> struct DBCHelper1
209  : std::conditional_t<DIVISOR == 1,
210  DBCAlgo1<SHIFT>,
211  std::conditional_t<DIVISOR & 1,
212  DBCHelper2<DIVISOR, SHIFT>,
213  DBCHelper1<DIVISOR / 2, SHIFT + 1>>> {};
214 
215 } // namespace DivModByConstPrivate
216 
217 
218 template<uint32_t DIVISOR> struct DivModByConst
219 {
220  uint32_t div(uint64_t dividend) const
221  {
222  #ifdef __x86_64
223  // on 64-bit CPUs gcc already does this
224  // optimization (and better)
225  return uint32_t(dividend / DIVISOR);
226  #else
228  return dbc(dividend);
229  #endif
230  }
231 
232  uint32_t mod(uint64_t dividend) const
233  {
234  uint64_t result;
235  #ifdef __x86_64
236  result = dividend % DIVISOR;
237  #else
238  result = dividend - DIVISOR * div(dividend);
239  #endif
240  #ifdef DEBUG
241  // we don't even want this overhead in devel builds
242  assert(result == uint32_t(result));
243  #endif
244  return uint32_t(result);
245  }
246 };
247 
248 #endif // DIVMODBYCONST
uint32_t operator()(uint64_t dividend) const
static const uint64_t quotientHigh
uint32_t mod(uint64_t dividend) const
Definition: stl_test.cc:7
static const uint64_t remainderHigh
uint32_t operator()(uint64_t dividend) const
uint32_t div(uint64_t dividend) const
uint32_t operator()(uint64_t dividend) const
static const uint64_t remainderLow
uint32_t operator()(uint64_t dividend) const
Utility class to optimize 64-bit divide/module by a 32-bit constant.
constexpr T log2p1(T x) noexcept
Returns the number of bits needed to store the value &#39;x&#39;, that is: for x==0 : 0 for x!=0 : 1 + floor(...
Definition: Math.hh:38
static const uint64_t quotientLow