openMSX
Math.hh
Go to the documentation of this file.
1#ifndef MATH_HH
2#define MATH_HH
3
4#include <bit>
5#include <cassert>
6#include <climits>
7#include <cmath>
8#include <concepts>
9#include <cstdint>
10#include <numbers>
11#include <span>
12
13#ifdef _MSC_VER
14#include <intrin.h>
15#pragma intrinsic(_BitScanForward)
16#endif
17
18namespace Math {
19
20inline constexpr double e = std::numbers::e_v <double>;
21inline constexpr double ln2 = std::numbers::ln2_v <double>;
22inline constexpr double ln10 = std::numbers::ln10_v<double>;
23inline constexpr double pi = std::numbers::pi_v <double>;
24
31[[nodiscard]] constexpr auto floodRight(std::unsigned_integral auto x) noexcept
32{
33 x |= x >> 1;
34 x |= x >> 2;
35 x |= x >> 4;
36 x |= x >> ((sizeof(x) >= 2) ? 8 : 0); // Written in a weird way to
37 x |= x >> ((sizeof(x) >= 4) ? 16 : 0); // suppress compiler warnings.
38 x |= x >> ((sizeof(x) >= 8) ? 32 : 0); // Generates equally efficient
39 return x; // code.
40}
41
45template<std::signed_integral T>
46[[nodiscard]] inline int16_t clipToInt16(T x)
47{
48 static_assert((T(-1) >> 1) == T(-1), "right-shift must preserve sign");
49 if (int16_t(x) == x) [[likely]] {
50 return x;
51 } else {
52 constexpr int SHIFT = (sizeof(T) * CHAR_BIT) - 1;
53 return 0x7FFF - (x >> SHIFT);
54 }
55}
56
60[[nodiscard]] inline uint8_t clipIntToByte(int x)
61{
62 static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
63 if (uint8_t(x) == x) [[likely]] {
64 return x;
65 } else {
66 return ~(x >> 31);
67 }
68}
69
74[[nodiscard]] constexpr unsigned reverseNBits(unsigned x, unsigned bits)
75{
76 unsigned ret = 0;
77 while (bits--) {
78 ret = (ret << 1) | (x & 1);
79 x >>= 1;
80 }
81 return ret;
82
83 /* Just for fun I tried the asm version below (the carry-flag trick
84 * cannot be described in plain C). It's correct and generates shorter
85 * code (both less instructions and less bytes). But it doesn't
86 * actually run faster on the machine I tested on, or only a tiny bit
87 * (possibly because of dependency chains and processor stalls???).
88 * However a big disadvantage of this asm version is that when called
89 * with compile-time constant arguments, this version performs exactly
90 * the same, while the version above can be further optimized by the
91 * compiler (constant-propagation, loop unrolling, ...).
92 unsigned ret = 0;
93 if (bits) {
94 asm (
95 "1: shr %[VAL]\n"
96 " adc %[RET],%[RET]\n"
97 " dec %[BITS]\n"
98 " jne 1b\n"
99 : [VAL] "+r" (val)
100 , [BITS] "+r" (bits)
101 , [RET] "+r" (ret)
102 );
103 }
104 return ret;
105 */
106
107 /* Maarten suggested the following approach with O(lg(N)) time
108 * complexity (the version above is O(N)).
109 * - reverse full (32-bit) word: O(lg(N))
110 * - shift right over 32-N bits: O(1)
111 * Note: In some lower end CPU the shift-over-N-bits instruction itself
112 * is O(N), in that case this whole algorithm is O(N)
113 * Note2: Instead of '32' it's also possible to use a lower power of 2,
114 * as long as it's bigger than or equal to N.
115 * This algorithm may or may not be faster than the version above, I
116 * didn't try it yet. Also because this routine is _NOT_ performance
117 * critical _AT_ALL_ currently.
118 */
119}
120
124[[nodiscard]] constexpr uint8_t reverseByte(uint8_t a)
125{
126 // Classical implementation (can be extended to 16 and 32 bits)
127 // a = ((a & 0xF0) >> 4) | ((a & 0x0F) << 4);
128 // a = ((a & 0xCC) >> 2) | ((a & 0x33) << 2);
129 // a = ((a & 0xAA) >> 1) | ((a & 0x55) << 1);
130 // return a;
131
132 // The versions below are specific to reverse a single byte (can't
133 // easily be extended to wider types). Found these tricks on:
134 // http://graphics.stanford.edu/~seander/bithacks.html
135#ifdef __x86_64
136 // on 64-bit systems this is slightly faster
137 return (((a * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL) >> 32;
138#else
139 // on 32-bit systems this is faster
140 return (((a * 0x0802 & 0x22110) | (a * 0x8020 & 0x88440)) * 0x10101) >> 16;
141#endif
142}
143
148[[nodiscard]] inline /*constexpr*/ unsigned findFirstSet(uint32_t x)
149{
150 return x ? std::countr_zero(x) + 1 : 0;
151}
152
153// Cubic Hermite Interpolation:
154// Given 4 points: (-1, y[0]), (0, y[1]), (1, y[2]), (2, y[3])
155// Fit a polynomial: f(x) = a*x^3 + b*x^2 + c*x + d
156// which passes through the given points at x=0 and x=1
157// f(0) = y[0]
158// f(1) = y[1]
159// and which has specific derivatives at x=0 and x=1
160// f'(0) = (y[1] - y[-1]) / 2
161// f'(1) = (y[2] - y[ 0]) / 2
162// Then evaluate this polynomial at the given x-position (x in [0, 1]).
163// For more details see:
164// https://en.wikipedia.org/wiki/Cubic_Hermite_spline
165// https://www.paulinternet.nl/?page=bicubic
166[[nodiscard]] constexpr float cubicHermite(std::span<const float, 4> y, float x)
167{
168 assert(0.0f <= x); assert(x <= 1.0f);
169 float a = -0.5f*y[0] + 1.5f*y[1] - 1.5f*y[2] + 0.5f*y[3];
170 float b = y[0] - 2.5f*y[1] + 2.0f*y[2] - 0.5f*y[3];
171 float c = -0.5f*y[0] + 0.5f*y[2];
172 float d = y[1];
173 float x2 = x * x;
174 float x3 = x * x2;
175 return a*x3 + b*x2 + c*x + d;
176}
177
186};
187constexpr QuotientRemainder div_mod_floor(int dividend, int divisor) {
188 int q = dividend / divisor;
189 int r = dividend % divisor;
190 if ((r != 0) && ((r < 0) != (divisor < 0))) {
191 --q;
192 r += divisor;
193 }
194 return {q, r};
195}
196constexpr int div_floor(int dividend, int divisor) {
197 return div_mod_floor(dividend, divisor).quotient;
198}
199constexpr int mod_floor(int dividend, int divisor) {
200 return div_mod_floor(dividend, divisor).remainder;
201}
202
203} // namespace Math
204
205#endif // MATH_HH
Definition: Math.hh:18
constexpr QuotientRemainder div_mod_floor(int dividend, int divisor)
Definition: Math.hh:187
constexpr unsigned reverseNBits(unsigned x, unsigned bits)
Reverse the lower N bits of a given value.
Definition: Math.hh:74
unsigned findFirstSet(uint32_t x)
Find the least significant bit that is set.
Definition: Math.hh:148
constexpr auto floodRight(std::unsigned_integral auto x) noexcept
Returns the smallest number of the form 2^n-1 that is greater or equal to the given number.
Definition: Math.hh:31
uint8_t clipIntToByte(int x)
Clip x to range [0,255].
Definition: Math.hh:60
constexpr double pi
Definition: Math.hh:23
constexpr int div_floor(int dividend, int divisor)
Definition: Math.hh:196
constexpr float cubicHermite(std::span< const float, 4 > y, float x)
Definition: Math.hh:166
constexpr int mod_floor(int dividend, int divisor)
Definition: Math.hh:199
constexpr double ln2
Definition: Math.hh:21
constexpr double ln10
Definition: Math.hh:22
int16_t clipToInt16(T x)
Clip x to range [-32768,32767].
Definition: Math.hh:46
constexpr uint8_t reverseByte(uint8_t a)
Reverse the bits in a byte.
Definition: Math.hh:124
constexpr double e
Definition: Math.hh:20
Divide one integer by another, rounding towards minus infinity.
Definition: Math.hh:183