39 static_assert(FFT_LEN_L2 >= 3);
40 static_assert(FFT_LEN_L2 <= 16);
41 static constexpr unsigned FFT_LEN = 1 << FFT_LEN_L2;
43 static void execute(std::span<const float, FFT_LEN> input,
44 std::span<float, FFT_LEN> output,
45 std::span<float, FFT_LEN> tmpBuf)
47 pass<FFT_LEN_L2 - 1>(output, tmpBuf, input);
52 static void pass(std::span<float, FFT_LEN> dst,
53 std::span<float, FFT_LEN> src,
54 std::span<const float, FFT_LEN> input)
56 if constexpr (PASS == 1) {
58 static constexpr auto len4 =
FFT_LEN >> 2;
59 for (
unsigned idx = 0; idx <
FFT_LEN; idx += 4) {
60 const unsigned ri_0 = bitRevBuf[idx >> 2];
61 const unsigned ri_1 = ri_0 + 2 * len4;
62 const unsigned ri_2 = ri_0 + 1 * len4;
63 const unsigned ri_3 = ri_0 + 3 * len4;
65 dst[idx + 1] = input[ri_0] - input[ri_1];
66 dst[idx + 3] = input[ri_2] - input[ri_3];
68 const float sf_0 = input[ri_0] + input[ri_1];
69 const float sf_2 = input[ri_2] + input[ri_3];
71 dst[idx + 0] = sf_0 + sf_2;
72 dst[idx + 2] = sf_0 - sf_2;
75 }
else if constexpr (PASS == 2) {
77 pass<1>(src, dst, input);
80 static constexpr float sqrt2_2 = std::numbers::sqrt2_v<float> * 0.5f;
81 for (
unsigned idx = 0; idx <
FFT_LEN; idx += 8) {
82 dst[idx + 0] = src[idx] + src[idx + 4];
83 dst[idx + 4] = src[idx] - src[idx + 4];
84 dst[idx + 2] = src[idx + 2];
85 dst[idx + 6] = src[idx + 6];
87 float v1 = (src[idx + 5] - src[idx + 7]) * sqrt2_2;
88 dst[idx + 1] = src[idx + 1] + v1;
89 dst[idx + 3] = src[idx + 1] - v1;
91 float v2 = (src[idx + 5] + src[idx + 7]) * sqrt2_2;
92 dst[idx + 5] = v2 + src[idx + 3];
93 dst[idx + 7] = v2 - src[idx + 3];
98 pass<PASS - 1>(src, dst, input);
100 static constexpr unsigned dist = 1 << (PASS - 1);
101 static constexpr unsigned c1_r = 0;
102 static constexpr unsigned c1_i = dist * 1;
103 static constexpr unsigned c2_r = dist * 2;
104 static constexpr unsigned c2_i = dist * 3;
105 static constexpr unsigned cend = dist * 4;
106 static constexpr unsigned table_step = COS_ARR_SIZE >> (PASS - 1);
108 for (
unsigned idx = 0; idx <
FFT_LEN; idx += cend) {
109 std::span<const float> sf = src.subspan(idx);
110 std::span< float> df = dst.subspan(idx);
113 df[c1_r] = sf[c1_r] + sf[c2_r];
114 df[c2_r] = sf[c1_r] - sf[c2_r];
119 for (
unsigned i = 1; i < dist; ++ i) {
120 const float c = cosBuf[ i * table_step];
121 const float s = cosBuf[(dist - i) * table_step];
123 const float sf_r_i = sf[c1_r + i];
124 const float sf_i_i = sf[c1_i + i];
126 const float v1 = sf[c2_r + i] * c - sf[c2_i + i] * s;
127 df[c1_r + i] = sf_r_i + v1;
128 df[c2_r - i] = sf_r_i - v1;
130 const float v2 = sf[c2_r + i] * s + sf[c2_i + i] * c;
131 df[c2_r + i] = v2 + sf_i_i;
132 df[cend - i] = v2 - sf_i_i;
138 static constexpr auto bitRevBuf = []{
139 constexpr int BR_ARR_SIZE =
FFT_LEN / 4;
140 std::array<uint16_t, BR_ARR_SIZE> result = {};
142 unsigned index = cnt << 2;
144 for (
int bit_cnt = FFT_LEN_L2; bit_cnt > 0; --bit_cnt) {
149 result[cnt] = narrow<uint16_t>(res);
154 static constexpr int COS_ARR_SIZE =
FFT_LEN / 4;
155 static constexpr auto cosBuf = []{
156 std::array<float, COS_ARR_SIZE> result = {};
157 const double mul = (0.5 * std::numbers::pi) / COS_ARR_SIZE;
159 result[i] = float(cstd::cos<4>(i * mul));