13#if ASM_X86 && defined _MSC_VER
23#if ASM_X86_32 && defined _MSC_VER
28[[nodiscard]]
static inline __m128i _mm_set1_epi64x(uint64_t val)
30 uint32_t low = val >> 32;
31 uint32_t high = val >> 0;
32 return _mm_set_epi32(low, high, low, high);
36static inline void memset_64_SSE(
37 uint64_t* out,
size_t num64, uint64_t val64)
39 if (num64 == 0) [[unlikely]]
return;
42 if (
size_t(out) & 8) [[unlikely]] {
47 __m128i val128 = _mm_set1_epi64x(narrow_cast<int64_t>(val64));
48 uint64_t*
e = out + num64 - 3;
49 for (; out <
e; out += 4) {
50 _mm_store_si128(
reinterpret_cast<__m128i*
>(out + 0), val128);
51 _mm_store_si128(
reinterpret_cast<__m128i*
>(out + 2), val128);
53 if (num64 & 2) [[unlikely]] {
54 _mm_store_si128(
reinterpret_cast<__m128i*
>(out), val128);
57 if (num64 & 1) [[unlikely]] {
63static inline void memset_64(
64 uint64_t* out,
size_t num64, uint64_t val64)
66 assert((
size_t(out) % 8) == 0);
69 memset_64_SSE(out, num64, val64);
72 uint64_t*
e = out + num64 - 3;
73 for (; out <
e; out += 4) {
79 if (num64 & 2) [[unlikely]] {
84 if (num64 & 1) [[unlikely]] {
89static inline void memset_32_2(
90 uint32_t* out,
size_t num32, uint32_t val0, uint32_t val1)
92 assert((
size_t(out) % 4) == 0);
93 if (num32 == 0) [[unlikely]]
return;
96 if (
size_t(out) & 4) [[unlikely]] {
101 uint64_t val64 =
Endian::BIG ? (uint64_t(val0) << 32) | val1
102 : val0 | (uint64_t(val1) << 32);
103 memset_64(
reinterpret_cast<uint64_t*
>(out), num32 / 2, val64);
105 if (num32 & 1) [[unlikely]] {
106 out[num32 - 1] = val0;
110static inline void memset_32(uint32_t* out,
size_t num32, uint32_t val32)
112 assert((
size_t(out) % 4) == 0);
119 __stosd(
reinterpret_cast<unsigned long*
>(out), val32, num32);
121 memset_32_2(out, num32, val32, val32);
124 uint32_t*
e = out + num32 - 7;
125 for (; out <
e; out += 8) {
135 if (num32 & 4) [[unlikely]] {
142 if (num32 & 2) [[unlikely]] {
147 if (num32 & 1) [[unlikely]] {
153static inline void memset_16_2(
154 uint16_t* out,
size_t num16, uint16_t val0, uint16_t val1)
156 if (num16 == 0) [[unlikely]]
return;
159 if (
size_t(out) & 2) [[unlikely]] {
164 uint32_t val32 =
Endian::BIG ? (uint32_t(val0) << 16) | val1
165 : val0 | (uint32_t(val1) << 16);
166 memset_32(
reinterpret_cast<uint32_t*
>(out), num16 / 2, val32);
168 if (num16 & 1) [[unlikely]] {
169 out[num16 - 1] = val0;
173static inline void memset_16(uint16_t* out,
size_t num16, uint16_t val16)
175 memset_16_2(out, num16, val16, val16);
179 std::span<Pixel> out,
Pixel val)
const
181 if constexpr (
sizeof(
Pixel) == 2) {
182 memset_16(
reinterpret_cast<uint16_t*
>(out.data()), out.size(), val);
183 }
else if constexpr (
sizeof(
Pixel) == 4) {
184 memset_32(
reinterpret_cast<uint32_t*
>(out.data()), out.size(), val);
191 std::span<Pixel> out,
Pixel val0,
Pixel val1)
const
193 if constexpr (
sizeof(
Pixel) == 2) {
194 memset_16_2(
reinterpret_cast<uint16_t*
>(out.data()), out.size(), val0, val1);
195 }
else if constexpr (
sizeof(
Pixel) == 4) {
196 memset_32_2(
reinterpret_cast<uint32_t*
>(out.data()), out.size(), val0, val1);
225 void insert(
void* aligned,
void* unaligned) {
226 if (!aligned)
return;
227 assert(!
contains(allocMap, aligned, &Entry::aligned));
228 allocMap.emplace_back(Entry{aligned, unaligned});
232 if (!aligned)
return nullptr;
236 void* unaligned = it->unaligned;
244 assert(allocMap.empty());
252 std::vector<Entry> allocMap;
257 assert(
"must be a power of 2" && std::has_single_bit(alignment));
258 assert(alignment >=
sizeof(
void*));
259#if HAVE_POSIX_MEMALIGN
260 void* aligned =
nullptr;
261 if (posix_memalign(&aligned, alignment,
size)) {
262 throw std::bad_alloc();
268#elif defined _MSC_VER
269 void* result = _aligned_malloc(
size, alignment);
270 if (!result &&
size)
throw std::bad_alloc();
273 auto t = alignment - 1;
274 void* unaligned = malloc(
size +
t);
276 throw std::bad_alloc();
278 auto aligned =
reinterpret_cast<void*
>(
279 (
reinterpret_cast<size_t>(unaligned) +
t) & ~t);
287#if HAVE_POSIX_MEMALIGN
292#elif defined _MSC_VER
293 return _aligned_free(aligned);
Aligned memory (de)allocation.
AllocMap(const AllocMap &)=delete
static AllocMap & instance()
void insert(void *, void *unaligned)
AllocMap & operator=(const AllocMap &)=delete
void * mallocAligned(size_t alignment, size_t size)
size_t size(std::string_view utf8)
void move_pop_back(VECTOR &v, typename VECTOR::iterator it)
Erase the pointed to element from the given vector.
auto rfind_unguarded(RANGE &range, const VAL &val, Proj proj={})
Similar to the find(_if)_unguarded functions above, but searches from the back to front.
constexpr bool contains(ITER first, ITER last, const VAL &val)
Check if a range contains a given value, using linear search.
void operator()(std::span< Pixel > out, Pixel val0, Pixel val1) const
void operator()(std::span< Pixel > out, Pixel val) const