openMSX
DivModByConst.hh
Go to the documentation of this file.
1 #ifndef DIVMODBYCONST_HH
2 #define DIVMODBYCONST_HH
3 
4 #include "Math.hh"
5 #include "uint128.hh"
6 #include <cstdint>
7 #include <type_traits>
8 
21 
22 struct Reduce0Result {
23  uint32_t divisor, shift;
24 };
25 constexpr Reduce0Result reduce0(uint32_t divisor)
26 {
27  uint32_t shift = 0;
28  while ((divisor & 1) == 0) {
29  divisor >>= 1;
30  ++shift;
31  }
32  return {divisor, shift};
33 }
34 
35 struct Reduce1Result {
36  uint64_t m;
37  uint32_t s;
38 };
39 constexpr Reduce1Result reduce1(uint64_t m, uint32_t s)
40 {
41  while (!(m & 1)) {
42  m >>= 1;
43  --s;
44  }
45  return {m, s};
46 }
47 
48 struct Reduce2Result {
50  uint32_t l;
51 };
52 constexpr Reduce2Result reduce2(uint128 m_low, uint128 m_high, uint32_t l)
53 {
54  while (((m_low >> 1) < (m_high >> 1)) && (l > 0)) {
55  m_low >>= 1;
56  m_high >>= 1;
57  --l;
58  }
59  return {m_low, m_high, l};
60 }
61 
62 
63 [[nodiscard]] static constexpr uint64_t mla64(uint64_t a, uint64_t b, uint64_t c)
64 {
65  // equivalent to this:
66  // return (__uint128_t(a) * b + c) >> 64;
67  uint64_t t1 = uint64_t(uint32_t(a)) * uint32_t(b);
68  uint64_t t2 = (a >> 32) * uint32_t(b);
69  uint64_t t3 = uint32_t(a) * (b >> 32);
70  uint64_t t4 = (a >> 32) * (b >> 32);
71 
72  uint64_t s1 = uint64_t(uint32_t(c)) + uint32_t(t1);
73  uint64_t s2 = (s1 >> 32) + (c >> 32) + (t1 >> 32) + t2;
74  uint64_t s3 = uint64_t(uint32_t(s2)) + uint32_t(t3);
75  uint64_t s4 = (s3 >> 32) + (s2 >> 32) + (t3 >> 32) + t4;
76  return s4;
77 }
78 
79 template<uint32_t DIVISOR>
80 [[nodiscard]] constexpr auto getAlgorithm()
81 {
82  // reduce to odd divisor
83  constexpr auto r0 = reduce0(DIVISOR);
84  if constexpr (r0.divisor == 1) {
85  // algorithm 1: division possible by only shifting
86  constexpr uint32_t shift = r0.shift;
87  return [=](uint64_t dividend) {
88  return dividend >> shift;
89  };
90  } else {
91  constexpr uint32_t L = Math::log2p1(r0.divisor);
92  constexpr uint64_t J = 0xffffffffffffffffull % r0.divisor;
93  constexpr uint128 L64 = uint128(1) << (L + 64);
94  constexpr uint128 k = L64 / (0xffffffffffffffffull - J);
95  constexpr uint128 m_low = L64 / r0.divisor;
96  constexpr uint128 m_high = (L64 + k) / r0.divisor;
97  constexpr auto r2 = reduce2(m_low, m_high, L);
98 
99  if constexpr (high64(r2.m_high) == 0) {
100  // algorithm 2: division possible by multiplication and shift
101  constexpr auto r1 = reduce1(low64(r2.m_high), r0.shift + r2.l);
102 
103  constexpr uint64_t mul = r1.m;
104  constexpr uint32_t shift = r1.s;
105  return [=](uint64_t dividend) {
106  return mla64(dividend, mul, 0) >> shift;
107  };
108  } else {
109  // algorithm 3: division possible by multiplication, addition and shift
110  constexpr uint32_t S = Math::log2p1(r0.divisor) - 1;
111  constexpr uint128 S64 = uint128(1) << (S + 64);
112  constexpr uint128 dq = S64 / r0.divisor;
113  constexpr uint128 dr = S64 % r0.divisor;
114  constexpr uint64_t M = low64(dq) + (low64(dr) > (r0.divisor / 2));
115  constexpr auto r1 = reduce1(M, S + r0.shift);
116 
117  constexpr uint64_t mul = r1.m;
118  constexpr uint32_t shift = r1.s;
119  return [=](uint64_t dividend) {
120  return mla64(dividend, mul, mul) >> shift;
121  };
122  }
123  }
124 }
125 
126 } // namespace DivModByConstPrivate
127 
128 
129 template<uint32_t DIVISOR> struct DivModByConst
130 {
131  [[nodiscard]] constexpr uint32_t div(uint64_t dividend) const
132  {
133  #ifdef __x86_64
134  // on 64-bit CPUs gcc already does this
135  // optimization (and better)
136  uint64_t result = dividend / DIVISOR;
137  #else
138  auto algorithm = DivModByConstPrivate::getAlgorithm<DIVISOR>();
139  uint64_t result = algorithm(dividend);
140  #endif
141  #ifdef DEBUG
142  // we don't even want this overhead in devel builds
143  assert(result == uint32_t(result));
144  #endif
145  return uint32_t(result);
146  }
147 
148  [[nodiscard]] constexpr uint32_t mod(uint64_t dividend) const
149  {
150  #ifdef __x86_64
151  uint64_t result = dividend % DIVISOR;
152  #else
153  uint64_t result = dividend - DIVISOR * div(dividend);
154  #endif
155  #ifdef DEBUG
156  // we don't even want this overhead in devel builds
157  assert(result == uint32_t(result));
158  #endif
159  return uint32_t(result);
160  }
161 };
162 
163 #endif // DIVMODBYCONST
DivModByConstPrivate::Reduce1Result
Definition: DivModByConst.hh:35
DivModByConstPrivate::Reduce2Result::m_high
uint128 m_high
Definition: DivModByConst.hh:49
S
Definition: stl_test.cc:7
DivModByConstPrivate::Reduce1Result::m
uint64_t m
Definition: DivModByConst.hh:36
DivModByConstPrivate::Reduce0Result::divisor
uint32_t divisor
Definition: DivModByConst.hh:23
DivModByConstPrivate::getAlgorithm
constexpr auto getAlgorithm()
Definition: DivModByConst.hh:80
DivModByConstPrivate::Reduce2Result::m_low
uint128 m_low
Definition: DivModByConst.hh:49
DivModByConstPrivate
Utility class to optimize 64-bit divide/module by a 32-bit constant.
Definition: DivModByConst.hh:20
openmsx::L
@ L
Definition: CPUCore.cc:204
DivModByConstPrivate::reduce1
constexpr Reduce1Result reduce1(uint64_t m, uint32_t s)
Definition: DivModByConst.hh:39
DivModByConstPrivate::Reduce0Result
Definition: DivModByConst.hh:22
DivModByConstPrivate::Reduce1Result::s
uint32_t s
Definition: DivModByConst.hh:37
uint128.hh
DivModByConstPrivate::Reduce0Result::shift
uint32_t shift
Definition: DivModByConst.hh:23
DivModByConst::mod
constexpr uint32_t mod(uint64_t dividend) const
Definition: DivModByConst.hh:148
DivModByConstPrivate::Reduce2Result::l
uint32_t l
Definition: DivModByConst.hh:50
DivModByConstPrivate::reduce2
constexpr Reduce2Result reduce2(uint128 m_low, uint128 m_high, uint32_t l)
Definition: DivModByConst.hh:52
DivModByConst::div
constexpr uint32_t div(uint64_t dividend) const
Definition: DivModByConst.hh:131
uint128
Unsigned 128-bit integer type.
Definition: uint128.hh:23
DivModByConstPrivate::reduce0
constexpr Reduce0Result reduce0(uint32_t divisor)
Definition: DivModByConst.hh:25
Math::log2p1
constexpr T log2p1(T x) noexcept
Returns the number of bits needed to store the value 'x', that is: for x==0 : 0 for x!...
Definition: Math.hh:39
DivModByConstPrivate::Reduce2Result
Definition: DivModByConst.hh:48
Math.hh
DivModByConst
Definition: DivModByConst.hh:129