openMSX
FFTReal.hh
Go to the documentation of this file.
1#ifndef FFTREAL_HH
2#define FFTREAL_HH
3
4// This code is based on:
5// fft-real
6// https://github.com/cyrilcode/fft-real
7// FFTReal is distributed under the terms of the Do What The Fuck You Want To Public License.
8//
9// This version is stripped down and specialized for our use case.
10
11#include "cstd.hh"
12#include "narrow.hh"
13#include "xrange.hh"
14
15#include <array>
16#include <cmath>
17#include <cstdint>
18#include <numbers>
19#include <span>
20
21// Calculate the FFT for real input (as opposed to complex numbers input).
22// The result however are still complex numbers, and arranged as follows:
23// output[0] = Real(bin 0)
24// output[1] = Real(bin 1)
25// ...
26// output[len/2 - 1] = Real(bin len/2 -1)
27// output[len/2 ] = Real(bin len/2)
28// output[len/2 + 1] = Imag(bin 1)
29// ...
30// output[len - 2] = Imag(bin len/2 - 2)
31// output[len - 1] = Real(bin len/2 - 1)
32// Note: There are two more real-parts than imaginary-parts. For the lowest bin
33// (the DC-offset) and for the highest bin (at Nyquist frequency) the output is
34// a pure real value (the imaginary part is zero).
35template<unsigned FFT_LEN_L2>
37{
38public:
39 static_assert(FFT_LEN_L2 >= 3); // at least 8-points
40 static_assert(FFT_LEN_L2 <= 16); // at most 65536-points
41 static constexpr unsigned FFT_LEN = 1 << FFT_LEN_L2;
42
43 static void execute(std::span<const float, FFT_LEN> input,
44 std::span<float, FFT_LEN> output,
45 std::span<float, FFT_LEN> tmpBuf)
46 {
47 pass<FFT_LEN_L2 - 1>(output, tmpBuf, input);
48 }
49
50private:
51 template<int PASS>
52 static void pass(std::span<float, FFT_LEN> dst,
53 std::span<float, FFT_LEN> src,
54 std::span<const float, FFT_LEN> input)
55 {
56 if constexpr (PASS == 1) {
57 // Do 1st and 2nd pass at once.
58 static constexpr auto len4 = FFT_LEN >> 2;
59 for (unsigned idx = 0; idx < FFT_LEN; idx += 4) {
60 const unsigned ri_0 = bitRevBuf[idx >> 2];
61 const unsigned ri_1 = ri_0 + 2 * len4; // bitRevBuf[idx + 1];
62 const unsigned ri_2 = ri_0 + 1 * len4; // bitRevBuf[idx + 2];
63 const unsigned ri_3 = ri_0 + 3 * len4; // bitRevBuf[idx + 3];
64
65 dst[idx + 1] = input[ri_0] - input[ri_1];
66 dst[idx + 3] = input[ri_2] - input[ri_3];
67
68 const float sf_0 = input[ri_0] + input[ri_1];
69 const float sf_2 = input[ri_2] + input[ri_3];
70
71 dst[idx + 0] = sf_0 + sf_2;
72 dst[idx + 2] = sf_0 - sf_2;
73 }
74
75 } else if constexpr (PASS == 2) {
76 // Start with 1st and 2nd passes (swap source and destination buffers).
77 pass<1>(src, dst, input);
78
79 // 3rd pass
80 static constexpr float sqrt2_2 = std::numbers::sqrt2_v<float> * 0.5f;
81 for (unsigned idx = 0; idx < FFT_LEN; idx += 8) {
82 dst[idx + 0] = src[idx] + src[idx + 4];
83 dst[idx + 4] = src[idx] - src[idx + 4];
84 dst[idx + 2] = src[idx + 2];
85 dst[idx + 6] = src[idx + 6];
86
87 float v1 = (src[idx + 5] - src[idx + 7]) * sqrt2_2;
88 dst[idx + 1] = src[idx + 1] + v1;
89 dst[idx + 3] = src[idx + 1] - v1;
90
91 float v2 = (src[idx + 5] + src[idx + 7]) * sqrt2_2;
92 dst[idx + 5] = v2 + src[idx + 3];
93 dst[idx + 7] = v2 - src[idx + 3];
94 }
95
96 } else {
97 // First do the previous passes (swap source and destination buffers).
98 pass<PASS - 1>(src, dst, input);
99
100 static constexpr unsigned dist = 1 << (PASS - 1);
101 static constexpr unsigned c1_r = 0;
102 static constexpr unsigned c1_i = dist * 1;
103 static constexpr unsigned c2_r = dist * 2;
104 static constexpr unsigned c2_i = dist * 3;
105 static constexpr unsigned cend = dist * 4;
106 static constexpr unsigned table_step = COS_ARR_SIZE >> (PASS - 1);
107
108 for (unsigned idx = 0; idx < FFT_LEN; idx += cend) {
109 std::span<const float> sf = src.subspan(idx);
110 std::span< float> df = dst.subspan(idx);
111
112 // Extreme coefficients are always real
113 df[c1_r] = sf[c1_r] + sf[c2_r];
114 df[c2_r] = sf[c1_r] - sf[c2_r];
115 df[c1_i] = sf[c1_i];
116 df[c2_i] = sf[c2_i];
117
118 // Others are conjugate complex numbers
119 for (unsigned i = 1; i < dist; ++ i) {
120 const float c = cosBuf[ i * table_step];
121 const float s = cosBuf[(dist - i) * table_step];
122
123 const float sf_r_i = sf[c1_r + i];
124 const float sf_i_i = sf[c1_i + i];
125
126 const float v1 = sf[c2_r + i] * c - sf[c2_i + i] * s;
127 df[c1_r + i] = sf_r_i + v1;
128 df[c2_r - i] = sf_r_i - v1;
129
130 const float v2 = sf[c2_r + i] * s + sf[c2_i + i] * c;
131 df[c2_r + i] = v2 + sf_i_i;
132 df[cend - i] = v2 - sf_i_i;
133 }
134 }
135 }
136 }
137
138 static constexpr auto bitRevBuf = []{
139 constexpr int BR_ARR_SIZE = FFT_LEN / 4;
140 std::array<uint16_t, BR_ARR_SIZE> result = {};
141 for (auto cnt : xrange(narrow<unsigned>(result.size()))) {
142 unsigned index = cnt << 2;
143 unsigned res = 0;
144 for (int bit_cnt = FFT_LEN_L2; bit_cnt > 0; --bit_cnt) {
145 res <<= 1;
146 res += (index & 1);
147 index >>= 1;
148 }
149 result[cnt] = narrow<uint16_t>(res);
150 }
151 return result;
152 }();
153
154 static constexpr int COS_ARR_SIZE = FFT_LEN / 4;
155 static constexpr auto cosBuf = []{
156 std::array<float, COS_ARR_SIZE> result = {};
157 const double mul = (0.5 * std::numbers::pi) / COS_ARR_SIZE;
158 for (auto i : xrange(narrow<unsigned>(result.size()))) {
159 result[i] = float(cstd::cos<4>(i * mul));
160 }
161 return result;
162 }();
163};
164
165#endif
static constexpr unsigned FFT_LEN
Definition FFTReal.hh:41
static void execute(std::span< const float, FFT_LEN > input, std::span< float, FFT_LEN > output, std::span< float, FFT_LEN > tmpBuf)
Definition FFTReal.hh:43
size_t size(std::string_view utf8)
constexpr To narrow(From from) noexcept
Definition narrow.hh:37
constexpr auto xrange(T e)
Definition xrange.hh:132