openMSX
Math.hh
Go to the documentation of this file.
1 #ifndef MATH_HH
2 #define MATH_HH
3 
4 #include "likely.hh"
5 #include <cassert>
6 #include <cmath>
7 #include <cstdint>
8 
9 #ifdef _MSC_VER
10 #include <intrin.h>
11 #pragma intrinsic(_BitScanForward)
12 #endif
13 
14 // These constants are very common extensions, but not guaranteed to be defined
15 // by <cmath> when compiling in a strict standards compliant mode. Also e.g.
16 // visual studio does not provide them.
17 #ifndef M_E
18 #define M_E 2.7182818284590452354
19 #endif
20 #ifndef M_LN2
21 #define M_LN2 0.69314718055994530942 // log_e 2
22 #endif
23 #ifndef M_LN10
24 #define M_LN10 2.30258509299404568402 // log_e 10
25 #endif
26 #ifndef M_PI
27 #define M_PI 3.14159265358979323846
28 #endif
29 
30 namespace Math {
31 
38 template<typename T>
39 [[nodiscard]] constexpr T log2p1(T x) noexcept
40 {
41  T result = 0;
42  while (x) {
43  ++result;
44  x >>= 1;
45  }
46  return result;
47 }
48 
56 template<typename T>
57 [[nodiscard]] constexpr bool ispow2(T x) noexcept
58 {
59  return x && ((x & (x - 1)) == 0);
60 }
61 
68 template<typename T>
69 [[nodiscard]] constexpr T floodRight(T x) noexcept
70 {
71  x |= x >> 1;
72  x |= x >> 2;
73  x |= x >> 4;
74  x |= x >> ((sizeof(x) >= 2) ? 8 : 0); // Written in a weird way to
75  x |= x >> ((sizeof(x) >= 4) ? 16 : 0); // suppress compiler warnings.
76  x |= x >> ((sizeof(x) >= 8) ? 32 : 0); // Generates equally efficient
77  return x; // code.
78 }
79 
84 template<typename T>
85 [[nodiscard]] constexpr T ceil2(T x) noexcept
86 {
87  // classical implementation:
88  // unsigned res = 1;
89  // while (x > res) res <<= 1;
90  // return res;
91 
92  // optimized version
93  x += (x == 0); // can be removed if argument is never zero
94  return floodRight(x - 1) + 1;
95 }
96 
101 template <int LO, int HI>
102 [[nodiscard]] inline int clip(int x)
103 {
104  static_assert(LO <= HI, "invalid clip range");
105  return unsigned(x - LO) <= unsigned(HI - LO) ? x : (x < HI ? LO : HI);
106 }
107 
111 [[nodiscard]] inline int16_t clipIntToShort(int x)
112 {
113  static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
114  return likely(int16_t(x) == x) ? x : (0x7FFF - (x >> 31));
115 }
116 
120 [[nodiscard]] inline uint8_t clipIntToByte(int x)
121 {
122  static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
123  return likely(uint8_t(x) == x) ? x : ~(x >> 31);
124 }
125 
135 [[nodiscard]] inline unsigned gcd(unsigned a, unsigned b)
136 {
137  unsigned k = 0;
138  while (((a & 1) == 0) && ((b & 1) == 0)) {
139  a >>= 1; b >>= 1; ++k;
140  }
141 
142  // either a or b (or both) is odd
143  while ((a & 1) == 0) a >>= 1;
144  while ((b & 1) == 0) b >>= 1;
145 
146  // both a and b odd
147  while (a != b) {
148  if (a >= b) {
149  a -= b;
150  do { a >>= 1; } while ((a & 1) == 0);
151  } else {
152  b -= a;
153  do { b >>= 1; } while ((b & 1) == 0);
154  }
155  }
156  return b << k;
157 }
158 
163 [[nodiscard]] inline unsigned reverseNBits(unsigned x, unsigned bits)
164 {
165  unsigned ret = 0;
166  while (bits--) {
167  ret = (ret << 1) | (x & 1);
168  x >>= 1;
169  }
170  return ret;
171 
172  /* Just for fun I tried the asm version below (the carry-flag trick
173  * cannot be described in plain C). It's correct and generates shorter
174  * code (both less instructions and less bytes). But it doesn't
175  * actually run faster on the machine I tested on, or only a tiny bit
176  * (possibly because of dependency chains and processor stalls???).
177  * However a big disadvantage of this asm version is that when called
178  * with compile-time constant arguments, this version performs exactly
179  * the same, while the version above can be further optimized by the
180  * compiler (constant-propagation, loop unrolling, ...).
181  unsigned ret = 0;
182  if (bits) {
183  asm (
184  "1: shr %[VAL]\n"
185  " adc %[RET],%[RET]\n"
186  " dec %[BITS]\n"
187  " jne 1b\n"
188  : [VAL] "+r" (val)
189  , [BITS] "+r" (bits)
190  , [RET] "+r" (ret)
191  );
192  }
193  return ret;
194  */
195 
196  /* Maarten suggested the following approach with O(lg(N)) time
197  * complexity (the version above is O(N)).
198  * - reverse full (32-bit) word: O(lg(N))
199  * - shift right over 32-N bits: O(1)
200  * Note: In some lower end CPU the shift-over-N-bits instruction itself
201  * is O(N), in that case this whole algorithm is O(N)
202  * Note2: Instead of '32' it's also possible to use a lower power of 2,
203  * as long as it's bigger than or equal to N.
204  * This algorithm may or may not be faster than the version above, I
205  * didn't try it yet. Also because this routine is _NOT_ performance
206  * critical _AT_ALL_ currently.
207  */
208 }
209 
213 [[nodiscard]] inline uint8_t reverseByte(uint8_t a)
214 {
215  // Classical implementation (can be extended to 16 and 32 bits)
216  // a = ((a & 0xF0) >> 4) | ((a & 0x0F) << 4);
217  // a = ((a & 0xCC) >> 2) | ((a & 0x33) << 2);
218  // a = ((a & 0xAA) >> 1) | ((a & 0x55) << 1);
219  // return a;
220 
221  // The versions below are specific to reverse a single byte (can't
222  // easily be extended to wider types). Found these tricks on:
223  // http://graphics.stanford.edu/~seander/bithacks.html
224 #ifdef __x86_64
225  // on 64-bit systems this is slightly faster
226  return (((a * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL) >> 32;
227 #else
228  // on 32-bit systems this is faster
229  return (((a * 0x0802 & 0x22110) | (a * 0x8020 & 0x88440)) * 0x10101) >> 16;
230 #endif
231 }
232 
236 [[nodiscard]] inline unsigned countLeadingZeros(unsigned x)
237 {
238 #ifdef __GNUC__
239  // actually this only exists starting from gcc-3.4.x
240  return __builtin_clz(x); // undefined when x==0
241 #else
242  // gives incorrect result for x==0, but that doesn't matter here
243  unsigned lz = 0;
244  if (x <= 0x0000ffff) { lz += 16; x <<= 16; }
245  if (x <= 0x00ffffff) { lz += 8; x <<= 8; }
246  if (x <= 0x0fffffff) { lz += 4; x <<= 4; }
247  lz += (0x55ac >> ((x >> 27) & 0x1e)) & 0x3;
248  return lz;
249 #endif
250 }
251 
256 [[nodiscard]] inline unsigned findFirstSet(unsigned x)
257 {
258 #if defined(__GNUC__)
259  return __builtin_ffs(x);
260 #elif defined(_MSC_VER)
261  unsigned long index;
262  return _BitScanForward(&index, x) ? index + 1 : 0;
263 #else
264  if (x == 0) return 0;
265  int pos = 0;
266  if ((x & 0xffff) == 0) { pos += 16; x >>= 16; }
267  if ((x & 0x00ff) == 0) { pos += 8; x >>= 8; }
268  if ((x & 0x000f) == 0) { pos += 4; x >>= 4; }
269  if ((x & 0x0003) == 0) { pos += 2; x >>= 2; }
270  if ((x & 0x0001) == 0) { pos += 1; }
271  return pos + 1;
272 #endif
273 }
274 
275 // Cubic Hermite Interpolation:
276 // Given 4 points: (-1, y[-1]), (0, y[0]), (1, y[1]), (2, y[2])
277 // Fit a polynomial: f(x) = a*x^3 + b*x^2 + c*x + d
278 // which passes through the given points at x=0 and x=1
279 // f(0) = y[0]
280 // f(1) = y[1]
281 // and which has specific derivatives at x=0 and x=1
282 // f'(0) = (y[1] - y[-1]) / 2
283 // f'(1) = (y[2] - y[ 0]) / 2
284 // Then evaluate this polynomial at the given x-position (x in [0, 1]).
285 // For more details see:
286 // https://en.wikipedia.org/wiki/Cubic_Hermite_spline
287 // https://www.paulinternet.nl/?page=bicubic
288 [[nodiscard]] inline float cubicHermite(const float* y, float x)
289 {
290  assert(0.0f <= x); assert(x <= 1.0f);
291  float a = -0.5f*y[-1] + 1.5f*y[0] - 1.5f*y[1] + 0.5f*y[2];
292  float b = y[-1] - 2.5f*y[0] + 2.0f*y[1] - 0.5f*y[2];
293  float c = -0.5f*y[-1] + 0.5f*y[1];
294  float d = y[0];
295  float x2 = x * x;
296  float x3 = x * x2;
297  return a*x3 + b*x2 + c*x + d;
298 }
299 
300 } // namespace Math
301 
302 #endif // MATH_HH
Math::findFirstSet
unsigned findFirstSet(unsigned x)
Find the least significant bit that is set.
Definition: Math.hh:256
Math::countLeadingZeros
unsigned countLeadingZeros(unsigned x)
Count the number of leading zero-bits in the given word.
Definition: Math.hh:236
Math::cubicHermite
float cubicHermite(const float *y, float x)
Definition: Math.hh:288
Math::floodRight
constexpr T floodRight(T x) noexcept
Returns the smallest number of the form 2^n-1 that is greater or equal to the given number.
Definition: Math.hh:69
Math::ispow2
constexpr bool ispow2(T x) noexcept
Is the given number an integral power of two? That is, does it have exactly one 1-bit in binary repre...
Definition: Math.hh:57
likely.hh
Math::clipIntToByte
uint8_t clipIntToByte(int x)
Clip x to range [0,255].
Definition: Math.hh:120
likely
#define likely(x)
Definition: likely.hh:14
openmsx::x
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:1419
Math::log2p1
constexpr T log2p1(T x) noexcept
Returns the number of bits needed to store the value 'x', that is: for x==0 : 0 for x!...
Definition: Math.hh:39
Math::reverseNBits
unsigned reverseNBits(unsigned x, unsigned bits)
Reverse the lower N bits of a given value.
Definition: Math.hh:163
Math::clipIntToShort
int16_t clipIntToShort(int x)
Clip x to range [-32768,32767].
Definition: Math.hh:111
Math::gcd
unsigned gcd(unsigned a, unsigned b)
Calculate greatest common divider of two strictly positive integers.
Definition: Math.hh:135
Math::clip
int clip(int x)
Clips x to the range [LO,HI].
Definition: Math.hh:102
Math::reverseByte
uint8_t reverseByte(uint8_t a)
Reverse the bits in a byte.
Definition: Math.hh:213
Math::ceil2
constexpr T ceil2(T x) noexcept
Returns the smallest number that is both >=a and a power of two.
Definition: Math.hh:85
Math
Definition: Math.hh:30