openMSX
Math.hh
Go to the documentation of this file.
1 #ifndef MATH_HH
2 #define MATH_HH
3 
4 #include "likely.hh"
5 #include <cmath>
6 #include <cstdint>
7 
8 #ifdef _MSC_VER
9 #include <intrin.h>
10 #pragma intrinsic(_BitScanForward)
11 #endif
12 
13 // These constants are very common extensions, but not guaranteed to be defined
14 // by <cmath> when compiling in a strict standards compliant mode. Also e.g.
15 // visual studio does not provide them.
16 #ifndef M_E
17 #define M_E 2.7182818284590452354
18 #endif
19 #ifndef M_LN2
20 #define M_LN2 0.69314718055994530942 // log_e 2
21 #endif
22 #ifndef M_LN10
23 #define M_LN10 2.30258509299404568402 // log_e 10
24 #endif
25 #ifndef M_PI
26 #define M_PI 3.14159265358979323846
27 #endif
28 
29 namespace Math {
30 
37 template<typename T>
38 constexpr T log2p1(T x) noexcept
39 {
40  T result = 0;
41  while (x) {
42  ++result;
43  x >>= 1;
44  }
45  return result;
46 }
47 
55 template<typename T>
56 constexpr bool ispow2(T x) noexcept
57 {
58  return x && ((x & (x - 1)) == 0);
59 }
60 
67 template<typename T>
68 constexpr T floodRight(T x) noexcept
69 {
70  x |= x >> 1;
71  x |= x >> 2;
72  x |= x >> 4;
73  x |= x >> ((sizeof(x) >= 2) ? 8 : 0); // Written in a weird way to
74  x |= x >> ((sizeof(x) >= 4) ? 16 : 0); // suppress compiler warnings.
75  x |= x >> ((sizeof(x) >= 8) ? 32 : 0); // Generates equally efficient
76  return x; // code.
77 }
78 
83 template<typename T>
84 constexpr T ceil2(T x) noexcept
85 {
86  // classical implementation:
87  // unsigned res = 1;
88  // while (x > res) res <<= 1;
89  // return res;
90 
91  // optimized version
92  x += (x == 0); // can be removed if argument is never zero
93  return floodRight(x - 1) + 1;
94 }
95 
100 template <int LO, int HI>
101 inline int clip(int x)
102 {
103  static_assert(LO <= HI, "invalid clip range");
104  return unsigned(x - LO) <= unsigned(HI - LO) ? x : (x < HI ? LO : HI);
105 }
106 
110 inline int16_t clipIntToShort(int x)
111 {
112  static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
113  return likely(int16_t(x) == x) ? x : (0x7FFF - (x >> 31));
114 }
115 
119 inline uint8_t clipIntToByte(int x)
120 {
121  static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
122  return likely(uint8_t(x) == x) ? x : ~(x >> 31);
123 }
124 
134 inline unsigned gcd(unsigned a, unsigned b)
135 {
136  unsigned k = 0;
137  while (((a & 1) == 0) && ((b & 1) == 0)) {
138  a >>= 1; b >>= 1; ++k;
139  }
140 
141  // either a or b (or both) is odd
142  while ((a & 1) == 0) a >>= 1;
143  while ((b & 1) == 0) b >>= 1;
144 
145  // both a and b odd
146  while (a != b) {
147  if (a >= b) {
148  a -= b;
149  do { a >>= 1; } while ((a & 1) == 0);
150  } else {
151  b -= a;
152  do { b >>= 1; } while ((b & 1) == 0);
153  }
154  }
155  return b << k;
156 }
157 
162 inline unsigned reverseNBits(unsigned x, unsigned bits)
163 {
164  unsigned ret = 0;
165  while (bits--) {
166  ret = (ret << 1) | (x & 1);
167  x >>= 1;
168  }
169  return ret;
170 
171  /* Just for fun I tried the asm version below (the carry-flag trick
172  * cannot be described in plain C). It's correct and generates shorter
173  * code (both less instructions and less bytes). But it doesn't
174  * actually run faster on the machine I tested on, or only a tiny bit
175  * (possibly because of dependency chains and processor stalls???).
176  * However a big disadvantage of this asm version is that when called
177  * with compile-time constant arguments, this version performs exactly
178  * the same, while the version above can be further optimized by the
179  * compiler (constant-propagation, loop unrolling, ...).
180  unsigned ret = 0;
181  if (bits) {
182  asm (
183  "1: shr %[VAL]\n"
184  " adc %[RET],%[RET]\n"
185  " dec %[BITS]\n"
186  " jne 1b\n"
187  : [VAL] "+r" (val)
188  , [BITS] "+r" (bits)
189  , [RET] "+r" (ret)
190  );
191  }
192  return ret;
193  */
194 
195  /* Maarten suggested the following approach with O(lg(N)) time
196  * complexity (the version above is O(N)).
197  * - reverse full (32-bit) word: O(lg(N))
198  * - shift right over 32-N bits: O(1)
199  * Note: In some lower end CPU the shift-over-N-bits instruction itself
200  * is O(N), in that case this whole algorithm is O(N)
201  * Note2: Instead of '32' it's also possible to use a lower power of 2,
202  * as long as it's bigger than or equal to N.
203  * This algorithm may or may not be faster than the version above, I
204  * didn't try it yet. Also because this routine is _NOT_ performance
205  * critical _AT_ALL_ currently.
206  */
207 }
208 
212 inline uint8_t reverseByte(uint8_t a)
213 {
214  // Classical implementation (can be extended to 16 and 32 bits)
215  // a = ((a & 0xF0) >> 4) | ((a & 0x0F) << 4);
216  // a = ((a & 0xCC) >> 2) | ((a & 0x33) << 2);
217  // a = ((a & 0xAA) >> 1) | ((a & 0x55) << 1);
218  // return a;
219 
220  // The versions below are specific to reverse a single byte (can't
221  // easily be extended to wider types). Found these tricks on:
222  // http://graphics.stanford.edu/~seander/bithacks.html
223 #ifdef __x86_64
224  // on 64-bit systems this is slightly faster
225  return (((a * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL) >> 32;
226 #else
227  // on 32-bit systems this is faster
228  return (((a * 0x0802 & 0x22110) | (a * 0x8020 & 0x88440)) * 0x10101) >> 16;
229 #endif
230 }
231 
235 inline unsigned countLeadingZeros(unsigned x)
236 {
237 #ifdef __GNUC__
238  // actually this only exists starting from gcc-3.4.x
239  return __builtin_clz(x); // undefined when x==0
240 #else
241  // gives incorrect result for x==0, but that doesn't matter here
242  unsigned lz = 0;
243  if (x <= 0x0000ffff) { lz += 16; x <<= 16; }
244  if (x <= 0x00ffffff) { lz += 8; x <<= 8; }
245  if (x <= 0x0fffffff) { lz += 4; x <<= 4; }
246  lz += (0x55ac >> ((x >> 27) & 0x1e)) & 0x3;
247  return lz;
248 #endif
249 }
250 
255 inline unsigned findFirstSet(unsigned x)
256 {
257 #if defined(__GNUC__)
258  return __builtin_ffs(x);
259 #elif defined(_MSC_VER)
260  unsigned long index;
261  return _BitScanForward(&index, x) ? index + 1 : 0;
262 #else
263  if (x == 0) return 0;
264  int pos = 0;
265  if ((x & 0xffff) == 0) { pos += 16; x >>= 16; }
266  if ((x & 0x00ff) == 0) { pos += 8; x >>= 8; }
267  if ((x & 0x000f) == 0) { pos += 4; x >>= 4; }
268  if ((x & 0x0003) == 0) { pos += 2; x >>= 2; }
269  if ((x & 0x0001) == 0) { pos += 1; }
270  return pos + 1;
271 #endif
272 }
273 
274 } // namespace Math
275 
276 #endif // MATH_HH
int clip(int x)
Clips x to the range [LO,HI].
Definition: Math.hh:101
constexpr T floodRight(T x) noexcept
Returns the smallest number of the form 2^n-1 that is greater or equal to the given number...
Definition: Math.hh:68
unsigned reverseNBits(unsigned x, unsigned bits)
Reverse the lower N bits of a given value.
Definition: Math.hh:162
int16_t clipIntToShort(int x)
Clip x to range [-32768,32767].
Definition: Math.hh:110
unsigned gcd(unsigned a, unsigned b)
Calculate greatest common divider of two strictly positive integers.
Definition: Math.hh:134
unsigned findFirstSet(unsigned x)
Find the least significant bit that is set.
Definition: Math.hh:255
constexpr T log2p1(T x) noexcept
Returns the number of bits needed to store the value &#39;x&#39;, that is: for x==0 : 0 for x!=0 : 1 + floor(...
Definition: Math.hh:38
Definition: Math.hh:29
#define likely(x)
Definition: likely.hh:14
uint8_t clipIntToByte(int x)
Clip x to range [0,255].
Definition: Math.hh:119
constexpr T ceil2(T x) noexcept
Returns the smallest number that is both >=a and a power of two.
Definition: Math.hh:84
uint8_t reverseByte(uint8_t a)
Reverse the bits in a byte.
Definition: Math.hh:212
constexpr bool ispow2(T x) noexcept
Is the given number an integral power of two? That is, does it have exactly one 1-bit in binary repre...
Definition: Math.hh:56
unsigned countLeadingZeros(unsigned x)
Count the number of leading zero-bits in the given word.
Definition: Math.hh:235