38 static_assert(FFT_LEN_L2 >= 3);
39 static_assert(FFT_LEN_L2 <= 16);
40 static constexpr unsigned FFT_LEN = 1 << FFT_LEN_L2;
42 static void execute(std::span<const float, FFT_LEN> input,
43 std::span<float, FFT_LEN> output,
44 std::span<float, FFT_LEN> tmpBuf)
46 pass<FFT_LEN_L2 - 1>(output, tmpBuf, input);
51 static void pass(std::span<float, FFT_LEN> dst,
52 std::span<float, FFT_LEN> src,
53 std::span<const float, FFT_LEN> input)
55 if constexpr (PASS == 1) {
57 static constexpr auto len4 =
FFT_LEN >> 2;
58 for (
unsigned idx = 0; idx <
FFT_LEN; idx += 4) {
59 const unsigned ri_0 = bitRevBuf[idx >> 2];
60 const unsigned ri_1 = ri_0 + 2 * len4;
61 const unsigned ri_2 = ri_0 + 1 * len4;
62 const unsigned ri_3 = ri_0 + 3 * len4;
64 dst[idx + 1] = input[ri_0] - input[ri_1];
65 dst[idx + 3] = input[ri_2] - input[ri_3];
67 const float sf_0 = input[ri_0] + input[ri_1];
68 const float sf_2 = input[ri_2] + input[ri_3];
70 dst[idx + 0] = sf_0 + sf_2;
71 dst[idx + 2] = sf_0 - sf_2;
74 }
else if constexpr (PASS == 2) {
76 pass<1>(src, dst, input);
79 static constexpr float sqrt2_2 = std::numbers::sqrt2_v<float> * 0.5f;
80 for (
unsigned idx = 0; idx <
FFT_LEN; idx += 8) {
81 dst[idx + 0] = src[idx] + src[idx + 4];
82 dst[idx + 4] = src[idx] - src[idx + 4];
83 dst[idx + 2] = src[idx + 2];
84 dst[idx + 6] = src[idx + 6];
86 float v1 = (src[idx + 5] - src[idx + 7]) * sqrt2_2;
87 dst[idx + 1] = src[idx + 1] + v1;
88 dst[idx + 3] = src[idx + 1] - v1;
90 float v2 = (src[idx + 5] + src[idx + 7]) * sqrt2_2;
91 dst[idx + 5] = v2 + src[idx + 3];
92 dst[idx + 7] = v2 - src[idx + 3];
97 pass<PASS - 1>(src, dst, input);
99 static constexpr unsigned dist = 1 << (PASS - 1);
100 static constexpr unsigned c1_r = 0;
101 static constexpr unsigned c1_i = dist * 1;
102 static constexpr unsigned c2_r = dist * 2;
103 static constexpr unsigned c2_i = dist * 3;
104 static constexpr unsigned cend = dist * 4;
105 static constexpr unsigned table_step = COS_ARR_SIZE >> (PASS - 1);
107 for (
unsigned idx = 0; idx <
FFT_LEN; idx += cend) {
108 std::span<const float> sf = src.subspan(idx);
109 std::span< float> df = dst.subspan(idx);
112 df[c1_r] = sf[c1_r] + sf[c2_r];
113 df[c2_r] = sf[c1_r] - sf[c2_r];
118 for (
unsigned i = 1; i < dist; ++ i) {
119 const float c = cosBuf[ i * table_step];
120 const float s = cosBuf[(dist - i) * table_step];
122 const float sf_r_i = sf[c1_r + i];
123 const float sf_i_i = sf[c1_i + i];
125 const float v1 = sf[c2_r + i] * c - sf[c2_i + i] * s;
126 df[c1_r + i] = sf_r_i + v1;
127 df[c2_r - i] = sf_r_i - v1;
129 const float v2 = sf[c2_r + i] * s + sf[c2_i + i] * c;
130 df[c2_r + i] = v2 + sf_i_i;
131 df[cend - i] = v2 - sf_i_i;
137 static constexpr auto bitRevBuf = []{
138 constexpr int BR_ARR_SIZE =
FFT_LEN / 4;
139 std::array<uint16_t, BR_ARR_SIZE> result = {};
140 for (
unsigned cnt = 0; cnt < result.size(); ++cnt) {
141 unsigned index = cnt << 2;
143 for (
int bit_cnt = FFT_LEN_L2; bit_cnt > 0; --bit_cnt) {
148 result[cnt] = narrow<uint16_t>(res);
153 static constexpr int COS_ARR_SIZE =
FFT_LEN / 4;
154 static constexpr auto cosBuf = []{
155 std::array<float, COS_ARR_SIZE> result = {};
156 const double mul = (0.5 * std::numbers::pi) / COS_ARR_SIZE;
157 for (
unsigned i = 0; i < result.size(); ++i) {
158 result[i] = float(cstd::cos<4>(i * mul));