openMSX
FFTReal.hh
Go to the documentation of this file.
1#ifndef FFTREAL_HH
2#define FFTREAL_HH
3
4// This code is based on:
5// fft-real
6// https://github.com/cyrilcode/fft-real
7// FFTReal is distributed under the terms of the Do What The Fuck You Want To Public License.
8//
9// This version is stripped down and specialized for our use case.
10
11#include "cstd.hh"
12#include "narrow.hh"
13
14#include <array>
15#include <cmath>
16#include <cstdint>
17#include <numbers>
18#include <span>
19
20// Calculate the FFT for real input (as opposed to complex numbers input).
21// The result however are still complex numbers, and arranged as follows:
22// output[0] = Real(bin 0)
23// output[1] = Real(bin 1)
24// ...
25// output[len/2 - 1] = Real(bin len/2 -1)
26// output[len/2 ] = Real(bin len/2)
27// output[len/2 + 1] = Imag(bin 1)
28// ...
29// output[len - 2] = Imag(bin len/2 - 2)
30// output[len - 1] = Real(bin len/2 - 1)
31// Note: There are two more real-parts than imaginary-parts. For the lowest bin
32// (the DC-offset) and for the highest bin (at Nyquist frequency) the output is
33// a pure real value (the imaginary part is zero).
34template<unsigned FFT_LEN_L2>
36{
37public:
38 static_assert(FFT_LEN_L2 >= 3); // at least 8-points
39 static_assert(FFT_LEN_L2 <= 16); // at most 65536-points
40 static constexpr unsigned FFT_LEN = 1 << FFT_LEN_L2;
41
42 static void execute(std::span<const float, FFT_LEN> input,
43 std::span<float, FFT_LEN> output,
44 std::span<float, FFT_LEN> tmpBuf)
45 {
46 pass<FFT_LEN_L2 - 1>(output, tmpBuf, input);
47 }
48
49private:
50 template<int PASS>
51 static void pass(std::span<float, FFT_LEN> dst,
52 std::span<float, FFT_LEN> src,
53 std::span<const float, FFT_LEN> input)
54 {
55 if constexpr (PASS == 1) {
56 // Do 1st and 2nd pass at once.
57 static constexpr auto len4 = FFT_LEN >> 2;
58 for (unsigned idx = 0; idx < FFT_LEN; idx += 4) {
59 const unsigned ri_0 = bitRevBuf[idx >> 2];
60 const unsigned ri_1 = ri_0 + 2 * len4; // bitRevBuf[idx + 1];
61 const unsigned ri_2 = ri_0 + 1 * len4; // bitRevBuf[idx + 2];
62 const unsigned ri_3 = ri_0 + 3 * len4; // bitRevBuf[idx + 3];
63
64 dst[idx + 1] = input[ri_0] - input[ri_1];
65 dst[idx + 3] = input[ri_2] - input[ri_3];
66
67 const float sf_0 = input[ri_0] + input[ri_1];
68 const float sf_2 = input[ri_2] + input[ri_3];
69
70 dst[idx + 0] = sf_0 + sf_2;
71 dst[idx + 2] = sf_0 - sf_2;
72 }
73
74 } else if constexpr (PASS == 2) {
75 // Start with 1st and 2nd passes (swap source and destination buffers).
76 pass<1>(src, dst, input);
77
78 // 3rd pass
79 static constexpr float sqrt2_2 = std::numbers::sqrt2_v<float> * 0.5f;
80 for (unsigned idx = 0; idx < FFT_LEN; idx += 8) {
81 dst[idx + 0] = src[idx] + src[idx + 4];
82 dst[idx + 4] = src[idx] - src[idx + 4];
83 dst[idx + 2] = src[idx + 2];
84 dst[idx + 6] = src[idx + 6];
85
86 float v1 = (src[idx + 5] - src[idx + 7]) * sqrt2_2;
87 dst[idx + 1] = src[idx + 1] + v1;
88 dst[idx + 3] = src[idx + 1] - v1;
89
90 float v2 = (src[idx + 5] + src[idx + 7]) * sqrt2_2;
91 dst[idx + 5] = v2 + src[idx + 3];
92 dst[idx + 7] = v2 - src[idx + 3];
93 }
94
95 } else {
96 // First do the previous passes (swap source and destination buffers).
97 pass<PASS - 1>(src, dst, input);
98
99 static constexpr unsigned dist = 1 << (PASS - 1);
100 static constexpr unsigned c1_r = 0;
101 static constexpr unsigned c1_i = dist * 1;
102 static constexpr unsigned c2_r = dist * 2;
103 static constexpr unsigned c2_i = dist * 3;
104 static constexpr unsigned cend = dist * 4;
105 static constexpr unsigned table_step = COS_ARR_SIZE >> (PASS - 1);
106
107 for (unsigned idx = 0; idx < FFT_LEN; idx += cend) {
108 std::span<const float> sf = src.subspan(idx);
109 std::span< float> df = dst.subspan(idx);
110
111 // Extreme coefficients are always real
112 df[c1_r] = sf[c1_r] + sf[c2_r];
113 df[c2_r] = sf[c1_r] - sf[c2_r];
114 df[c1_i] = sf[c1_i];
115 df[c2_i] = sf[c2_i];
116
117 // Others are conjugate complex numbers
118 for (unsigned i = 1; i < dist; ++ i) {
119 const float c = cosBuf[ i * table_step];
120 const float s = cosBuf[(dist - i) * table_step];
121
122 const float sf_r_i = sf[c1_r + i];
123 const float sf_i_i = sf[c1_i + i];
124
125 const float v1 = sf[c2_r + i] * c - sf[c2_i + i] * s;
126 df[c1_r + i] = sf_r_i + v1;
127 df[c2_r - i] = sf_r_i - v1;
128
129 const float v2 = sf[c2_r + i] * s + sf[c2_i + i] * c;
130 df[c2_r + i] = v2 + sf_i_i;
131 df[cend - i] = v2 - sf_i_i;
132 }
133 }
134 }
135 }
136
137 static constexpr auto bitRevBuf = []{
138 constexpr int BR_ARR_SIZE = FFT_LEN / 4;
139 std::array<uint16_t, BR_ARR_SIZE> result = {};
140 for (unsigned cnt = 0; cnt < result.size(); ++cnt) {
141 unsigned index = cnt << 2;
142 unsigned res = 0;
143 for (int bit_cnt = FFT_LEN_L2; bit_cnt > 0; --bit_cnt) {
144 res <<= 1;
145 res += (index & 1);
146 index >>= 1;
147 }
148 result[cnt] = narrow<uint16_t>(res);
149 }
150 return result;
151 }();
152
153 static constexpr int COS_ARR_SIZE = FFT_LEN / 4;
154 static constexpr auto cosBuf = []{
155 std::array<float, COS_ARR_SIZE> result = {};
156 const double mul = (0.5 * std::numbers::pi) / COS_ARR_SIZE;
157 for (unsigned i = 0; i < result.size(); ++i) {
158 result[i] = float(cstd::cos<4>(i * mul));
159 }
160 return result;
161 }();
162};
163
164#endif
static constexpr unsigned FFT_LEN
Definition FFTReal.hh:40
static void execute(std::span< const float, FFT_LEN > input, std::span< float, FFT_LEN > output, std::span< float, FFT_LEN > tmpBuf)
Definition FFTReal.hh:42