15 #if ASM_X86 && defined _MSC_VER
19 #include <emmintrin.h>
25 #if ASM_X86_32 && defined _MSC_VER
30 [[nodiscard]]
static inline __m128i _mm_set1_epi64x(uint64_t val)
32 uint32_t low = val >> 32;
33 uint32_t high = val >> 0;
34 return _mm_set_epi32(low, high, low, high);
38 static inline void memset_64_SSE(
39 uint64_t* out,
size_t num64, uint64_t val64)
49 __m128i val128 = _mm_set1_epi64x(val64);
50 uint64_t* e = out + num64 - 3;
51 for (; out < e; out += 4) {
52 _mm_store_si128(
reinterpret_cast<__m128i*
>(out + 0), val128);
53 _mm_store_si128(
reinterpret_cast<__m128i*
>(out + 2), val128);
56 _mm_store_si128(
reinterpret_cast<__m128i*
>(out), val128);
65 static inline void memset_64(
66 uint64_t* out,
size_t num64, uint64_t val64)
68 assert((
size_t(out) % 8) == 0);
71 memset_64_SSE(out, num64, val64);
74 uint64_t* e = out + num64 - 3;
75 for (; out < e; out += 4) {
91 static inline void memset_32_2(
92 uint32_t* out,
size_t num32, uint32_t val0, uint32_t val1)
94 assert((
size_t(out) % 4) == 0);
103 uint64_t val64 = OPENMSX_BIGENDIAN ? (uint64_t(val0) << 32) | val1
104 : val0 | (uint64_t(val1) << 32);
105 memset_64(
reinterpret_cast<uint64_t*
>(out), num32 / 2, val64);
108 out[num32 - 1] = val0;
112 static inline void memset_32(uint32_t* out,
size_t num32, uint32_t val32)
114 assert((
size_t(out) % 4) == 0);
121 __stosd(
reinterpret_cast<unsigned long*
>(out), val32, num32);
123 memset_32_2(out, num32, val32, val32);
126 uint32_t* e = out + num32 - 7;
127 for (; out < e; out += 8) {
155 static inline void memset_16_2(
156 uint16_t* out,
size_t num16, uint16_t val0, uint16_t val1)
166 uint32_t val32 = OPENMSX_BIGENDIAN ? (uint32_t(val0) << 16) | val1
167 : val0 | (uint32_t(val1) << 16);
168 memset_32(
reinterpret_cast<uint32_t*
>(out), num16 / 2, val32);
171 out[num16 - 1] = val0;
175 static inline void memset_16(uint16_t* out,
size_t num16, uint16_t val16)
177 memset_16_2(out, num16, val16, val16);
183 if (
sizeof(
Pixel) == 2) {
184 memset_16(
reinterpret_cast<uint16_t*
>(out), num, val);
185 }
else if (
sizeof(
Pixel) == 4) {
186 memset_32(
reinterpret_cast<uint32_t*
>(out), num, val);
195 if (
sizeof(
Pixel) == 2) {
196 memset_16_2(
reinterpret_cast<uint16_t*
>(out), num, val0, val1);
197 }
else if (
sizeof(
Pixel) == 4) {
198 memset_32_2(
reinterpret_cast<uint32_t*
>(out), num, val0, val1);
227 void insert(
void* aligned,
void* unaligned) {
228 if (!aligned)
return;
230 allocMap.emplace_back(aligned, unaligned);
234 if (!aligned)
return nullptr;
237 EqualTupleValue<0>(aligned));
239 void* unaligned = it->second;
247 assert(allocMap.empty());
251 std::vector<std::pair<void*, void*>> allocMap;
256 assert(
"must be a power of 2" &&
Math::ispow2(alignment));
257 assert(alignment >=
sizeof(
void*));
258 #if HAVE_POSIX_MEMALIGN
259 void* aligned =
nullptr;
260 if (posix_memalign(&aligned, alignment,
size)) {
261 throw std::bad_alloc();
267 #elif defined _MSC_VER
268 void* result = _aligned_malloc(
size, alignment);
269 if (!result &&
size)
throw std::bad_alloc();
272 auto t = alignment - 1;
273 void* unaligned = malloc(
size +
t);
275 throw std::bad_alloc();
277 auto aligned =
reinterpret_cast<void*
>(
278 (
reinterpret_cast<size_t>(unaligned) +
t) & ~
t);
286 #if HAVE_POSIX_MEMALIGN
291 #elif defined _MSC_VER
292 return _aligned_free(aligned);
Aligned memory (de)allocation.
static AllocMap & instance()
AllocMap(const AllocMap &)=delete
void insert(void *, void *unaligned)
AllocMap & operator=(const AllocMap &)=delete
constexpr bool ispow2(T x) noexcept
Is the given number an integral power of two? That is, does it have exactly one 1-bit in binary repre...
void * mallocAligned(size_t alignment, size_t size)
bool none_of(InputRange &&range, UnaryPredicate pred)
size_t size(std::string_view utf8)
void move_pop_back(VECTOR &v, typename VECTOR::iterator it)
Erase the pointed to element from the given vector.
constexpr auto rfind_if_unguarded(RANGE &range, PRED pred)
void operator()(Pixel *out, size_t num, Pixel val0, Pixel val1) const
void operator()(Pixel *out, size_t num, Pixel val) const