openMSX
DivModByConst.hh
Go to the documentation of this file.
1 #ifndef DIVMODBYCONST
2 #define DIVMODBYCONST
3 
4 #include "build-info.hh"
5 #include <type_traits>
6 #include <cstdint>
7 
20 
21 template<uint32_t A, uint32_t R = 0> struct log2
22  : std::conditional_t<A == 0, std::integral_constant<int, R>,
23  log2<A / 2, R + 1>> {};
24 
25 // Utility class to perform 128-bit by 128-bit division at compilation time
26 template<uint64_t RH, uint64_t RL, uint64_t QH, uint64_t QL, uint64_t DH, uint64_t DL, uint32_t BITS>
28 {
29  static const uint64_t QL2 = (QL << 1);
30  static const uint64_t QH2 = (QH << 1) + (QL2 < QL);
31  static const uint64_t RL2 = (RL << 1) + (QH2 < QH);
32  static const uint64_t RH2 = (RH << 1) + (RL2 < RL);
33 
34  static const bool C = (RH2 != DH) ? (RH2 < DH) : (RL2 < DL);
35  static const uint64_t RL3 = C ? RL2 : RL2 - DL;
36  static const uint64_t RH3 = C ? RH2 : RH2 - DH - (RL3 > RL2);
37  static const uint64_t QL3 = C ? QL2 : QL2 + 1;
38  static const uint64_t QH3 = C ? QH2 : ((QL3 != 0) ? QH2 : QH2 + 1);
39 
40  using Div = Div128_helper<RH3, RL3, QH3, QL3, DH, DL, BITS - 1>;
41  static const uint64_t quotientLow = Div::quotientLow;
42  static const uint64_t quotientHigh = Div::quotientHigh;
43  static const uint64_t remainderLow = Div::remainderLow;
44  static const uint64_t remainderHigh = Div::remainderHigh;
45 };
46 template<uint64_t RH, uint64_t RL, uint64_t QH, uint64_t QL, uint64_t DH, uint64_t DL>
47 struct Div128_helper<RH, RL, QH, QL, DH, DL, 0>
48 {
49  static const uint64_t quotientLow = QL;
50  static const uint64_t quotientHigh = QH;
51  static const uint64_t remainderLow = RL;
52  static const uint64_t remainderHigh = RH;
53 };
54 template<uint64_t DividendH, uint64_t DividendL, uint64_t DividerH, uint64_t DividerL>
55 struct Div128
56  : Div128_helper<0, 0, DividendH, DividendL, DividerH, DividerL, 128> {};
57 
58 
59 // equivalent to the following run-time loop:
60 // while (!(M & 1)) {
61 // M >>= 1;
62 // --S;
63 // }
64 template<uint64_t M, uint32_t S, bool B = M & 1> struct DBCReduce
65 {
66  using R2 = DBCReduce<M / 2, S - 1>;
67  static const uint64_t M2 = R2::M2;
68  static const uint32_t S2 = R2::S2;
69 };
70 template<uint64_t M, uint32_t S> struct DBCReduce<M, S, true>
71 {
72  static const uint64_t M2 = M;
73  static const uint32_t S2 = S;
74 };
75 
76 // equivalent to the following run-tim loop:
77 // while (((m_low >> 1) < (m_high >> 1)) && (l > 0)) {
78 // m_low >>= 1;
79 // m_high >>= 1;
80 // --l;
81 // }
82 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL>
84 {
85  static const uint64_t AH2 = AH / 2;
86  static const uint64_t AL2 = AL / 2 + ((AH2 * 2 != AH) ? (1ull << 63) : 0);
87  static const uint64_t BH2 = BH / 2;
88  static const uint64_t BL2 = BL / 2 + ((BH2 * 2 != BH) ? (1ull << 63) : 0);
89 };
90 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t L>
92 {
94  static const bool C = (S::AH2 != S::BH2) ? (S::AH2 < S::BH2)
95  : (S::AL2 < S::BL2);
96  static const bool value = C && (L > 0);
97 };
98 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t LL, bool B>
100 {
102  using T = DBCReduce2Test<S::AH2, S::AL2, S::BH2, S::BL2, LL - 1>;
103  using R = DBCReduce2Loop<S::AH2, S::AL2, S::BH2, S::BL2, LL - 1, T::value>;
104  static const uint64_t MLH = R::MLH;
105  static const uint64_t MLL = R::MLL;
106  static const uint64_t MHH = R::MHH;
107  static const uint64_t MHL = R::MHL;
108  static const uint32_t L = R::L;
109 };
110 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t LL>
111 struct DBCReduce2Loop<AH, AL, BH, BL, LL, false>
112 {
113  static const uint64_t MLH = AH;
114  static const uint64_t MLL = AL;
115  static const uint64_t MHH = BH;
116  static const uint64_t MHL = BL;
117  static const uint32_t L = LL;
118 };
119 template<uint64_t AH, uint64_t AL, uint64_t BH, uint64_t BL, uint32_t LL>
121 {
124  static const uint64_t MLH = R::MLH;
125  static const uint64_t MLL = R::MLL;
126  static const uint64_t MHH = R::MHH;
127  static const uint64_t MHL = R::MHL;
128  static const uint32_t L = R::L;
129 };
130 
131 
132 template<uint32_t S> struct DBCAlgo1
133 {
134  // division possible by only shifting
135  uint32_t operator()(uint64_t dividend) const
136  {
137  return dividend >> S;
138  }
139 };
140 
141 static inline uint64_t mla64(uint64_t a, uint64_t b, uint64_t c)
142 {
143  // equivalent to this:
144  // return (__uint128_t(a) * b + c) >> 64;
145  uint64_t t1 = uint64_t(uint32_t(a)) * uint32_t(b);
146  uint64_t t2 = (a >> 32) * uint32_t(b);
147  uint64_t t3 = uint32_t(a) * (b >> 32);
148  uint64_t t4 = (a >> 32) * (b >> 32);
149 
150  uint64_t s1 = uint64_t(uint32_t(c)) + uint32_t(t1);
151  uint64_t s2 = (s1 >> 32) + (c >> 32) + (t1 >> 32) + t2;
152  uint64_t s3 = uint64_t(uint32_t(s2)) + uint32_t(t3);
153  uint64_t s4 = (s3 >> 32) + (s2 >> 32) + (t3 >> 32) + t4;
154  return s4;
155 }
156 
157 template<uint64_t M, uint32_t S> struct DBCAlgo2
158 {
159  // division possible by multiplication and shift
160  uint32_t operator()(uint64_t dividend) const
161  {
162  using R = DBCReduce<M, S>;
163  uint64_t h = mla64(dividend, R::M2, 0);
164  uint64_t result = h >> R::S2;
165  #ifdef DEBUG
166  // we don't even want this overhead in devel builds
167  assert(result == uint32_t(result));
168  #endif
169  return uint32_t(result);
170  }
171 };
172 
173 template<uint32_t DIVISOR, uint32_t N> struct DBCAlgo3
174 {
175  // division possible by multiplication, addition and shift
176  static const uint32_t S = log2<DIVISOR>::value - 1;
178  static const uint64_t M = D::quotientLow + (D::remainderLow > (DIVISOR / 2));
179 
180  uint32_t operator()(uint64_t dividend) const
181  {
182  using R = DBCReduce<M, S + N>;
183  uint64_t h = mla64(dividend, R::M2, R::M2);
184  return h >> R::S2;
185  }
186 };
187 
188 
189 template<uint32_t DIVISOR, uint32_t N, typename RM> struct DBCHelper3
190  : std::conditional_t<RM::MHH == 0, DBCAlgo2<RM::MHL, N + RM::L>,
191  DBCAlgo3<DIVISOR, N>> {};
192 
193 template<uint32_t DIVISOR, uint32_t N> struct DBCHelper2
194 {
195  static const uint32_t L = log2<DIVISOR>::value;
196  static const uint64_t J = 0xffffffffffffffffull % DIVISOR;
197  using K = Div128<1 << L, 0, 0, 0xffffffffffffffffull - J>;
198 
201  using R = DBCReduce2<M_LOW ::quotientHigh, M_LOW ::quotientLow,
202  M_HIGH::quotientHigh, M_HIGH::quotientLow, L>;
203 
204  uint32_t operator()(uint64_t dividend) const
205  {
207  return dbc(dividend);
208  }
209 };
210 
211 template<uint32_t DIVISOR, uint32_t SHIFT> struct DBCHelper1
212  : std::conditional_t<DIVISOR == 1,
213  DBCAlgo1<SHIFT>,
214  std::conditional_t<DIVISOR & 1,
215  DBCHelper2<DIVISOR, SHIFT>,
216  DBCHelper1<DIVISOR / 2, SHIFT + 1>>> {};
217 
218 } // namespace DivModByConstPrivate
219 
220 
221 template<uint32_t DIVISOR> struct DivModByConst
222 {
223  uint32_t div(uint64_t dividend) const
224  {
225  #ifdef __x86_64
226  // on 64-bit CPUs gcc already does this
227  // optimization (and better)
228  return uint32_t(dividend / DIVISOR);
229  #else
231  return dbc(dividend);
232  #endif
233  }
234 
235  uint32_t mod(uint64_t dividend) const
236  {
237  uint64_t result;
238  #ifdef __x86_64
239  result = dividend % DIVISOR;
240  #else
241  result = dividend - DIVISOR * div(dividend);
242  #endif
243  #ifdef DEBUG
244  // we don't even want this overhead in devel builds
245  assert(result == uint32_t(result));
246  #endif
247  return uint32_t(result);
248  }
249 };
250 
251 #endif // DIVMODBYCONST
uint32_t operator()(uint64_t dividend) const
uint32_t mod(uint64_t dividend) const
Definition: stl_test.cc:7
uint32_t operator()(uint64_t dividend) const
uint32_t div(uint64_t dividend) const
uint32_t operator()(uint64_t dividend) const
uint32_t operator()(uint64_t dividend) const
Utility class to optimize 64-bit divide/module by a 32-bit constant.