openMSX
Math.hh
Go to the documentation of this file.
1 #ifndef MATH_HH
2 #define MATH_HH
3 
4 #include "likely.hh"
5 #include <cassert>
6 #include <cmath>
7 #include <cstdint>
8 
9 #ifdef _MSC_VER
10 #include <intrin.h>
11 #pragma intrinsic(_BitScanForward)
12 #endif
13 
14 // These constants are very common extensions, but not guaranteed to be defined
15 // by <cmath> when compiling in a strict standards compliant mode. Also e.g.
16 // visual studio does not provide them.
17 #ifndef M_E
18 #define M_E 2.7182818284590452354
19 #endif
20 #ifndef M_LN2
21 #define M_LN2 0.69314718055994530942 // log_e 2
22 #endif
23 #ifndef M_LN10
24 #define M_LN10 2.30258509299404568402 // log_e 10
25 #endif
26 #ifndef M_PI
27 #define M_PI 3.14159265358979323846
28 #endif
29 
30 namespace Math {
31 
38 template<typename T>
39 [[nodiscard]] constexpr T log2p1(T x) noexcept
40 {
41  T result = 0;
42  while (x) {
43  ++result;
44  x >>= 1;
45  }
46  return result;
47 }
48 
56 template<typename T>
57 [[nodiscard]] constexpr bool ispow2(T x) noexcept
58 {
59  return x && ((x & (x - 1)) == 0);
60 }
61 
68 template<typename T>
69 [[nodiscard]] constexpr T floodRight(T x) noexcept
70 {
71  x |= x >> 1;
72  x |= x >> 2;
73  x |= x >> 4;
74  x |= x >> ((sizeof(x) >= 2) ? 8 : 0); // Written in a weird way to
75  x |= x >> ((sizeof(x) >= 4) ? 16 : 0); // suppress compiler warnings.
76  x |= x >> ((sizeof(x) >= 8) ? 32 : 0); // Generates equally efficient
77  return x; // code.
78 }
79 
84 template<typename T>
85 [[nodiscard]] constexpr T ceil2(T x) noexcept
86 {
87  // classical implementation:
88  // unsigned res = 1;
89  // while (x > res) res <<= 1;
90  // return res;
91 
92  // optimized version
93  x += (x == 0); // can be removed if argument is never zero
94  return floodRight(x - 1) + 1;
95 }
96 
100 [[nodiscard]] inline int16_t clipIntToShort(int x)
101 {
102  static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
103  return likely(int16_t(x) == x) ? x : (0x7FFF - (x >> 31));
104 }
105 
109 [[nodiscard]] inline uint8_t clipIntToByte(int x)
110 {
111  static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
112  return likely(uint8_t(x) == x) ? x : ~(x >> 31);
113 }
114 
119 [[nodiscard]] inline unsigned reverseNBits(unsigned x, unsigned bits)
120 {
121  unsigned ret = 0;
122  while (bits--) {
123  ret = (ret << 1) | (x & 1);
124  x >>= 1;
125  }
126  return ret;
127 
128  /* Just for fun I tried the asm version below (the carry-flag trick
129  * cannot be described in plain C). It's correct and generates shorter
130  * code (both less instructions and less bytes). But it doesn't
131  * actually run faster on the machine I tested on, or only a tiny bit
132  * (possibly because of dependency chains and processor stalls???).
133  * However a big disadvantage of this asm version is that when called
134  * with compile-time constant arguments, this version performs exactly
135  * the same, while the version above can be further optimized by the
136  * compiler (constant-propagation, loop unrolling, ...).
137  unsigned ret = 0;
138  if (bits) {
139  asm (
140  "1: shr %[VAL]\n"
141  " adc %[RET],%[RET]\n"
142  " dec %[BITS]\n"
143  " jne 1b\n"
144  : [VAL] "+r" (val)
145  , [BITS] "+r" (bits)
146  , [RET] "+r" (ret)
147  );
148  }
149  return ret;
150  */
151 
152  /* Maarten suggested the following approach with O(lg(N)) time
153  * complexity (the version above is O(N)).
154  * - reverse full (32-bit) word: O(lg(N))
155  * - shift right over 32-N bits: O(1)
156  * Note: In some lower end CPU the shift-over-N-bits instruction itself
157  * is O(N), in that case this whole algorithm is O(N)
158  * Note2: Instead of '32' it's also possible to use a lower power of 2,
159  * as long as it's bigger than or equal to N.
160  * This algorithm may or may not be faster than the version above, I
161  * didn't try it yet. Also because this routine is _NOT_ performance
162  * critical _AT_ALL_ currently.
163  */
164 }
165 
169 [[nodiscard]] inline uint8_t reverseByte(uint8_t a)
170 {
171  // Classical implementation (can be extended to 16 and 32 bits)
172  // a = ((a & 0xF0) >> 4) | ((a & 0x0F) << 4);
173  // a = ((a & 0xCC) >> 2) | ((a & 0x33) << 2);
174  // a = ((a & 0xAA) >> 1) | ((a & 0x55) << 1);
175  // return a;
176 
177  // The versions below are specific to reverse a single byte (can't
178  // easily be extended to wider types). Found these tricks on:
179  // http://graphics.stanford.edu/~seander/bithacks.html
180 #ifdef __x86_64
181  // on 64-bit systems this is slightly faster
182  return (((a * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL) >> 32;
183 #else
184  // on 32-bit systems this is faster
185  return (((a * 0x0802 & 0x22110) | (a * 0x8020 & 0x88440)) * 0x10101) >> 16;
186 #endif
187 }
188 
192 [[nodiscard]] inline unsigned countLeadingZeros(unsigned x)
193 {
194 #ifdef __GNUC__
195  // actually this only exists starting from gcc-3.4.x
196  return __builtin_clz(x); // undefined when x==0
197 #else
198  // gives incorrect result for x==0, but that doesn't matter here
199  unsigned lz = 0;
200  if (x <= 0x0000ffff) { lz += 16; x <<= 16; }
201  if (x <= 0x00ffffff) { lz += 8; x <<= 8; }
202  if (x <= 0x0fffffff) { lz += 4; x <<= 4; }
203  lz += (0x55ac >> ((x >> 27) & 0x1e)) & 0x3;
204  return lz;
205 #endif
206 }
207 
212 [[nodiscard]] inline unsigned findFirstSet(unsigned x)
213 {
214 #if defined(__GNUC__)
215  return __builtin_ffs(x);
216 #elif defined(_MSC_VER)
217  unsigned long index;
218  return _BitScanForward(&index, x) ? index + 1 : 0;
219 #else
220  if (x == 0) return 0;
221  int pos = 0;
222  if ((x & 0xffff) == 0) { pos += 16; x >>= 16; }
223  if ((x & 0x00ff) == 0) { pos += 8; x >>= 8; }
224  if ((x & 0x000f) == 0) { pos += 4; x >>= 4; }
225  if ((x & 0x0003) == 0) { pos += 2; x >>= 2; }
226  if ((x & 0x0001) == 0) { pos += 1; }
227  return pos + 1;
228 #endif
229 }
230 
231 // Cubic Hermite Interpolation:
232 // Given 4 points: (-1, y[-1]), (0, y[0]), (1, y[1]), (2, y[2])
233 // Fit a polynomial: f(x) = a*x^3 + b*x^2 + c*x + d
234 // which passes through the given points at x=0 and x=1
235 // f(0) = y[0]
236 // f(1) = y[1]
237 // and which has specific derivatives at x=0 and x=1
238 // f'(0) = (y[1] - y[-1]) / 2
239 // f'(1) = (y[2] - y[ 0]) / 2
240 // Then evaluate this polynomial at the given x-position (x in [0, 1]).
241 // For more details see:
242 // https://en.wikipedia.org/wiki/Cubic_Hermite_spline
243 // https://www.paulinternet.nl/?page=bicubic
244 [[nodiscard]] inline float cubicHermite(const float* y, float x)
245 {
246  assert(0.0f <= x); assert(x <= 1.0f);
247  float a = -0.5f*y[-1] + 1.5f*y[0] - 1.5f*y[1] + 0.5f*y[2];
248  float b = y[-1] - 2.5f*y[0] + 2.0f*y[1] - 0.5f*y[2];
249  float c = -0.5f*y[-1] + 0.5f*y[1];
250  float d = y[0];
251  float x2 = x * x;
252  float x3 = x * x2;
253  return a*x3 + b*x2 + c*x + d;
254 }
255 
256 } // namespace Math
257 
258 #endif // MATH_HH
Math::findFirstSet
unsigned findFirstSet(unsigned x)
Find the least significant bit that is set.
Definition: Math.hh:212
Math::countLeadingZeros
unsigned countLeadingZeros(unsigned x)
Count the number of leading zero-bits in the given word.
Definition: Math.hh:192
Math::cubicHermite
float cubicHermite(const float *y, float x)
Definition: Math.hh:244
Math::floodRight
constexpr T floodRight(T x) noexcept
Returns the smallest number of the form 2^n-1 that is greater or equal to the given number.
Definition: Math.hh:69
Math::ispow2
constexpr bool ispow2(T x) noexcept
Is the given number an integral power of two? That is, does it have exactly one 1-bit in binary repre...
Definition: Math.hh:57
likely.hh
Math::clipIntToByte
uint8_t clipIntToByte(int x)
Clip x to range [0,255].
Definition: Math.hh:109
likely
#define likely(x)
Definition: likely.hh:14
openmsx::x
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:1419
Math::log2p1
constexpr T log2p1(T x) noexcept
Returns the number of bits needed to store the value 'x', that is: for x==0 : 0 for x!...
Definition: Math.hh:39
Math::reverseNBits
unsigned reverseNBits(unsigned x, unsigned bits)
Reverse the lower N bits of a given value.
Definition: Math.hh:119
Math::clipIntToShort
int16_t clipIntToShort(int x)
Clip x to range [-32768,32767].
Definition: Math.hh:100
Math::reverseByte
uint8_t reverseByte(uint8_t a)
Reverse the bits in a byte.
Definition: Math.hh:169
Math::ceil2
constexpr T ceil2(T x) noexcept
Returns the smallest number that is both >=a and a power of two.
Definition: Math.hh:85
Math
Definition: Math.hh:30