30 #include <emmintrin.h>
39 #include "ResampleCoeffs.ii"
57 void getCoeffs(
double ratio, int16_t*& permute,
float*&
table,
unsigned& filterLen);
67 static Table calcTable(
double ratio, int16_t* permute,
unsigned& filterLen);
76 std::vector<Element> cache;
79 ResampleCoeffs::~ResampleCoeffs()
81 assert(cache.empty());
87 return resampleCoeffs;
91 double ratio, int16_t*& permute,
float*&
table,
unsigned& filterLen)
93 if (
auto it =
ranges::find_if(cache, [=](
auto& e) {
return e.ratio == ratio; });
95 permute = it->permute.data();
97 filterLen = it->filterLen;
105 elem.table = calcTable(ratio, elem.permute.data(), elem.filterLen);
106 permute = elem.permute.data();
107 table = elem.table.data();
108 filterLen = elem.filterLen;
109 cache.push_back(std::move(elem));
115 [=](
const Element& e) {
return e.ratio == ratio; });
117 if (it->count == 0) {
230 constexpr
unsigned N1 =
N - 1;
231 constexpr
unsigned N2 =
N / 2;
233 static constexpr
unsigned mapIdx(
unsigned x)
239 static constexpr std::pair<unsigned, unsigned> next(
unsigned x,
unsigned step)
244 static void calcPermute(
double ratio, int16_t* permute)
246 double r2 = ratio *
N;
247 double fract = r2 - floor(r2);
248 unsigned step = floor(r2);
261 std::fill_n(permute,
N2, -1);
264 unsigned restart = incr ? 0 :
N2 - 1;
265 unsigned curr = restart;
269 std::tie(nxt1, nxt2) = next(i,
step);
270 if ((nxt1 == i) || (nxt2 == i)) { curr = i;
break; }
273 for (
unsigned i =
N2 - 1; int(i) >= 0; --i) {
274 std::tie(nxt1, nxt2) = next(i,
step);
275 if ((nxt1 == i) || (nxt2 == i)) { curr = i;
break; }
282 assert(permute[curr] == -1);
284 permute[curr] = cnt++;
286 std::tie(nxt1, nxt2) = next(curr,
step);
287 if (permute[nxt1] == -1) {
290 }
else if (permute[nxt2] == -1) {
296 if (cnt ==
N2)
break;
299 while (permute[restart] != -1) {
302 assert(restart !=
N2);
304 assert(restart != 0);
312 int16_t testPerm[
N2];
314 assert(std::is_permutation(permute, permute +
N2, testPerm));
318 static constexpr
double getCoeff(
FilterIndex index)
320 double fraction = index.fractionAsDouble();
321 int indx = index.toInt();
322 return double(
coeffs[indx]) +
323 fraction * (double(
coeffs[indx + 1]) - double(
coeffs[indx]));
326 ResampleCoeffs::Table ResampleCoeffs::calcTable(
327 double ratio, int16_t* permute,
unsigned& filterLen)
329 calcPermute(ratio, permute);
332 double normFactor = floatIncr /
INDEX_INC;
336 int min_idx = -maxFilterIndex.divAsInt(increment);
337 int max_idx = 1 + (maxFilterIndex - (increment -
FilterIndex(floatIncr))).divAsInt(increment);
338 int idx_cnt = max_idx - min_idx + 1;
339 filterLen = (idx_cnt + 3) & ~3;
340 min_idx -= (filterLen - idx_cnt) / 2;
345 float* tab = &
table[permute[
t] * filterLen];
346 double lastPos = (double(
t) + 0.5) /
TAB_LEN;
350 int coeffCount = (maxFilterIndex - filterIndex).divAsInt(increment);
351 filterIndex += increment * coeffCount;
352 int bufIndex = -coeffCount;
354 tab[bufIndex - min_idx] =
355 float(getCoeff(filterIndex) * normFactor);
356 filterIndex -= increment;
360 filterIndex = increment - startFilterIndex;
361 coeffCount = (maxFilterIndex - filterIndex).divAsInt(increment);
362 filterIndex += increment * coeffCount;
363 bufIndex = 1 + coeffCount;
365 tab[bufIndex - min_idx] =
366 float(getCoeff(filterIndex) * normFactor);
367 filterIndex -= increment;
375 template<
unsigned CHANNELS>
379 , hostClock(hostClock_)
380 , ratio(float(hostClock.getPeriod().toDouble() / getEmuClock().getPeriod().toDouble()))
385 unsigned extra = int(filterLen + 1 + ratio + 1);
389 unsigned initialSize = 4000;
390 buffer.resize((initialSize + extra) *
CHANNELS);
393 template<
unsigned CHANNELS>
400 template<
bool REVERSE>
401 static inline void calcSseMono(
const float* buf_,
const float* tab_,
size_t len,
float* out)
403 assert((len % 4) == 0);
404 assert((uintptr_t(tab_) % 16) == 0);
406 ptrdiff_t
x = (len & ~7) *
sizeof(
float);
407 assert((
x % 32) == 0);
408 const char* buf =
reinterpret_cast<const char*
>(buf_) +
x;
409 const char* tab =
reinterpret_cast<const char*
>(tab_) + (REVERSE ? -
x :
x);
412 __m128 a0 = _mm_setzero_ps();
413 __m128 a1 = _mm_setzero_ps();
415 __m128 b0 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf +
x + 0));
416 __m128 b1 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf +
x + 16));
418 if constexpr (REVERSE) {
419 t0 = _mm_loadr_ps(
reinterpret_cast<const float*
>(tab -
x - 16));
420 t1 = _mm_loadr_ps(
reinterpret_cast<const float*
>(tab -
x - 32));
422 t0 = _mm_load_ps (
reinterpret_cast<const float*
>(tab +
x + 0));
423 t1 = _mm_load_ps (
reinterpret_cast<const float*
>(tab +
x + 16));
425 __m128 m0 = _mm_mul_ps(b0, t0);
426 __m128 m1 = _mm_mul_ps(b1, t1);
427 a0 = _mm_add_ps(a0, m0);
428 a1 = _mm_add_ps(a1, m1);
429 x += 2 *
sizeof(__m128);
432 __m128 b0 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf));
434 if constexpr (REVERSE) {
435 t0 = _mm_loadr_ps(
reinterpret_cast<const float*
>(tab - 16));
437 t0 = _mm_load_ps (
reinterpret_cast<const float*
>(tab));
439 __m128 m0 = _mm_mul_ps(b0, t0);
440 a0 = _mm_add_ps(a0, m0);
443 __m128 a = _mm_add_ps(a0, a1);
446 __m128
t = _mm_add_ps(a, _mm_movehl_ps(a, a));
447 __m128 s = _mm_add_ss(
t, _mm_shuffle_ps(
t,
t, 1));
449 _mm_store_ss(out, s);
452 template<
int N>
static inline __m128 shuffle(__m128
x)
454 return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(
x),
N));
456 template<
bool REVERSE>
457 static inline void calcSseStereo(
const float* buf_,
const float* tab_,
size_t len,
float* out)
459 assert((len % 4) == 0);
460 assert((uintptr_t(tab_) % 16) == 0);
462 ptrdiff_t
x = 2 * (len & ~7) *
sizeof(
float);
463 const char* buf =
reinterpret_cast<const char*
>(buf_) +
x;
464 const char* tab =
reinterpret_cast<const char*
>(tab_);
467 __m128 a0 = _mm_setzero_ps();
468 __m128 a1 = _mm_setzero_ps();
469 __m128 a2 = _mm_setzero_ps();
470 __m128 a3 = _mm_setzero_ps();
472 __m128 b0 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf +
x + 0));
473 __m128 b1 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf +
x + 16));
474 __m128 b2 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf +
x + 32));
475 __m128 b3 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf +
x + 48));
477 if constexpr (REVERSE) {
478 ta = _mm_loadr_ps(
reinterpret_cast<const float*
>(tab - 16));
479 tb = _mm_loadr_ps(
reinterpret_cast<const float*
>(tab - 32));
480 tab -= 2 *
sizeof(__m128);
482 ta = _mm_load_ps (
reinterpret_cast<const float*
>(tab + 0));
483 tb = _mm_load_ps (
reinterpret_cast<const float*
>(tab + 16));
484 tab += 2 *
sizeof(__m128);
486 __m128 t0 = shuffle<0x50>(ta);
487 __m128 t1 = shuffle<0xFA>(ta);
488 __m128 t2 = shuffle<0x50>(tb);
489 __m128 t3 = shuffle<0xFA>(tb);
490 __m128 m0 = _mm_mul_ps(b0, t0);
491 __m128 m1 = _mm_mul_ps(b1, t1);
492 __m128 m2 = _mm_mul_ps(b2, t2);
493 __m128 m3 = _mm_mul_ps(b3, t3);
494 a0 = _mm_add_ps(a0, m0);
495 a1 = _mm_add_ps(a1, m1);
496 a2 = _mm_add_ps(a2, m2);
497 a3 = _mm_add_ps(a3, m3);
498 x += 4 *
sizeof(__m128);
501 __m128 b0 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf + 0));
502 __m128 b1 = _mm_loadu_ps(
reinterpret_cast<const float*
>(buf + 16));
504 if constexpr (REVERSE) {
505 ta = _mm_loadr_ps(
reinterpret_cast<const float*
>(tab - 16));
507 ta = _mm_load_ps (
reinterpret_cast<const float*
>(tab + 0));
509 __m128 t0 = shuffle<0x50>(ta);
510 __m128 t1 = shuffle<0xFA>(ta);
511 __m128 m0 = _mm_mul_ps(b0, t0);
512 __m128 m1 = _mm_mul_ps(b1, t1);
513 a0 = _mm_add_ps(a0, m0);
514 a1 = _mm_add_ps(a1, m1);
517 __m128 a01 = _mm_add_ps(a0, a1);
518 __m128 a23 = _mm_add_ps(a2, a3);
519 __m128 a = _mm_add_ps(a01, a23);
521 __m128 s = _mm_add_ps(a, _mm_movehl_ps(a, a));
522 _mm_store_ss(&out[0], s);
523 _mm_store_ss(&out[1], shuffle<0x55>(s));
528 template<
unsigned CHANNELS>
529 void ResampleHQ<CHANNELS>::calcOutput(
530 float pos,
float* __restrict output)
532 assert((filterLen & 3) == 0);
534 int bufIdx = int(pos) + bufStart;
535 assert((bufIdx + filterLen) <= bufEnd);
537 const float* buf = &buffer[bufIdx];
543 const float* tab = &
table[
t * filterLen];
547 calcSseMono <false>(buf, tab, filterLen, output);
549 calcSseStereo<false>(buf, tab, filterLen, output);
560 for (
unsigned i = 0; i < filterLen; i += 4) {
561 r0 += tab[i + 0] * buf[
CHANNELS * (i + 0)];
562 r1 += tab[i + 1] * buf[
CHANNELS * (i + 1)];
563 r2 += tab[i + 2] * buf[
CHANNELS * (i + 2)];
564 r3 += tab[i + 3] * buf[
CHANNELS * (i + 3)];
566 output[ch] = r0 + r1 + r2 + r3;
572 const float* tab = &
table[(
t + 1) * filterLen];
576 calcSseMono <true>(buf, tab, filterLen, output);
578 calcSseStereo<true>(buf, tab, filterLen, output);
589 for (
int i = 0; i < int(filterLen); i += 4) {
590 r0 += tab[-i - 1] * buf[
CHANNELS * (i + 0)];
591 r1 += tab[-i - 2] * buf[
CHANNELS * (i + 1)];
592 r2 += tab[-i - 3] * buf[
CHANNELS * (i + 2)];
593 r3 += tab[-i - 4] * buf[
CHANNELS * (i + 3)];
595 output[ch] = r0 + r1 + r2 + r3;
601 template<
unsigned CHANNELS>
602 void ResampleHQ<CHANNELS>::prepareData(
unsigned emuNum)
605 unsigned free = unsigned(buffer.size() /
CHANNELS) - bufEnd;
609 unsigned available = bufEnd - bufStart;
610 memmove(&buffer[0], &buffer[bufStart *
CHANNELS],
611 available *
CHANNELS *
sizeof(
float));
615 free = unsigned(buffer.size() /
CHANNELS) - bufEnd;
616 int missing = emuNum - free;
625 buffer.resize(buffer.size() + missing *
CHANNELS);
629 if (input.generateInput(tmpBuf, emuNum)) {
630 memcpy(&buffer[bufEnd *
CHANNELS], tmpBuf,
633 nonzeroSamples = bufEnd - bufStart;
635 memset(&buffer[bufEnd *
CHANNELS], 0,
640 assert(bufStart <= bufEnd);
641 assert(bufEnd <= (buffer.size() /
CHANNELS));
644 template<
unsigned CHANNELS>
646 float* __restrict dataOut,
unsigned hostNum, EmuTime::param time)
648 auto& emuClk = getEmuClock();
649 unsigned emuNum = emuClk.getTicksTill(time);
654 bool notMuted = nonzeroSamples > 0;
657 EmuTime host1 = hostClock.getFastAdd(1);
658 assert(host1 > emuClk.getTime());
659 float pos = emuClk.getTicksTillDouble(host1);
660 assert(pos <= (ratio + 2));
661 for (
auto i :
xrange(hostNum)) {
662 calcOutput(pos, &dataOut[i *
CHANNELS]);
668 nonzeroSamples = std::max<int>(0, nonzeroSamples - emuNum);
670 assert(bufStart <= bufEnd);
671 unsigned available = bufEnd - bufStart;
672 unsigned extra = int(filterLen + 1 + ratio + 1);
673 assert(available == extra); (void)available; (void)extra;
constexpr unsigned CHANNELS
Represents a clock with a variable frequency.
const T * data() const
Returns pointer to the start of the memory buffer.
static ResampleCoeffs & instance()
ResampleCoeffs(const ResampleCoeffs &)=delete
ResampleCoeffs & operator=(const ResampleCoeffs &)=delete
void getCoeffs(double ratio, int16_t *&permute, float *&table, unsigned &filterLen)
void releaseCoeffs(double ratio)
bool generateOutputImpl(float *dataOut, unsigned num, EmuTime::param time) override
ResampleHQ(ResampledSoundDevice &input, const DynamicClock &hostClock)
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
This file implemented 3 utility functions:
constexpr int COEFF_HALF_LEN
FixedPoint< 16 > FilterIndex
constexpr unsigned TAB_LEN
constexpr unsigned HALF_TAB_LEN
constexpr KeyMatrixPosition x
Keyboard bindings.
auto find_if(InputRange &&range, UnaryPredicate pred)
constexpr void iota(ForwardIt first, ForwardIt last, T value)
size_t size(std::string_view utf8)
void move_pop_back(VECTOR &v, typename VECTOR::iterator it)
Erase the pointed to element from the given vector.
constexpr auto rfind_if_unguarded(RANGE &range, PRED pred)
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
constexpr auto xrange(T e)
constexpr auto end(const zstring_view &x)