26#if ASM_X86_32 && defined _MSC_VER
31[[nodiscard]]
static inline __m128i _mm_set1_epi64x(uint64_t val)
33 uint32_t low = val >> 32;
34 uint32_t high = val >> 0;
35 return _mm_set_epi32(low, high, low, high);
39static inline void memset_64_SSE(
40 uint64_t* out,
size_t num64, uint64_t val64)
42 if (num64 == 0) [[unlikely]]
return;
45 if (
size_t(out) & 8) [[unlikely]] {
50 __m128i val128 = _mm_set1_epi64x(narrow_cast<int64_t>(val64));
51 const uint64_t* e = out + num64 - 3;
52 for (; out < e; out += 4) {
53 _mm_store_si128(std::bit_cast<__m128i*>(out + 0), val128);
54 _mm_store_si128(std::bit_cast<__m128i*>(out + 2), val128);
56 if (num64 & 2) [[unlikely]] {
57 _mm_store_si128(std::bit_cast<__m128i*>(out), val128);
60 if (num64 & 1) [[unlikely]] {
66static inline void memset_64(
67 uint64_t* out,
size_t num64, uint64_t val64)
69 assert((
size_t(out) % 8) == 0);
72 memset_64_SSE(out, num64, val64);
75 const uint64_t* e = out + num64 - 3;
76 for (; out < e; out += 4) {
82 if (num64 & 2) [[unlikely]] {
87 if (num64 & 1) [[unlikely]] {
92static inline void memset_32_2(
93 uint32_t* out,
size_t num32, uint32_t val0, uint32_t val1)
95 assert((
size_t(out) % 4) == 0);
96 if (num32 == 0) [[unlikely]]
return;
99 if (
size_t(out) & 4) [[unlikely]] {
104 uint64_t val64 =
Endian::BIG ? (uint64_t(val0) << 32) | val1
105 : val0 | (uint64_t(val1) << 32);
106 memset_64(std::bit_cast<uint64_t*>(out), num32 / 2, val64);
108 if (num32 & 1) [[unlikely]] {
109 out[num32 - 1] = val0;
113static inline void memset_32(uint32_t* out,
size_t num32, uint32_t val32)
115 assert((
size_t(out) % 4) == 0);
122 __stosd(std::bit_cast<unsigned long*>(out), val32, num32);
124 memset_32_2(out, num32, val32, val32);
127 uint32_t* e = out + num32 - 7;
128 for (; out < e; out += 8) {
138 if (num32 & 4) [[unlikely]] {
145 if (num32 & 2) [[unlikely]] {
150 if (num32 & 1) [[unlikely]] {
157 std::span<Pixel> out,
Pixel val)
const
159 if constexpr (
sizeof(
Pixel) == 4) {
160 memset_32(std::bit_cast<uint32_t*>(out.data()), out.size(), val);
167 std::span<Pixel> out,
Pixel val0,
Pixel val1)
const
169 if constexpr (
sizeof(
Pixel) == 4) {
170 memset_32_2(std::bit_cast<uint32_t*>(out.data()), out.size(), val0, val1);
177template struct MemSet <uint32_t>;
199 void insert(
void* aligned,
void* unaligned) {
200 if (!aligned)
return;
201 assert(!
contains(allocMap, aligned, &Entry::aligned));
202 allocMap.emplace_back(Entry{aligned, unaligned});
206 if (!aligned)
return nullptr;
210 void* unaligned = it->unaligned;
218 assert(allocMap.empty());
226 std::vector<Entry> allocMap;
231 assert(
"must be a power of 2" && std::has_single_bit(alignment));
232 assert(alignment >=
sizeof(
void*));
233#if HAVE_POSIX_MEMALIGN
234 void* aligned =
nullptr;
235 if (posix_memalign(&aligned, alignment, size)) {
236 throw std::bad_alloc();
242#elif defined _MSC_VER
243 void* result = _aligned_malloc(size, alignment);
244 if (!result && size)
throw std::bad_alloc();
247 auto t = alignment - 1;
248 void* unaligned = malloc(size +
t);
250 throw std::bad_alloc();
252 auto aligned = std::bit_cast<void*>(
253 (std::bit_cast<uintptr_t>(unaligned) +
t) & ~
t);
261#if HAVE_POSIX_MEMALIGN
266#elif defined _MSC_VER
267 return _aligned_free(aligned);
auto rfind_unguarded(RANGE &range, const VAL &val, Proj proj={})
Similar to the find(_if)_unguarded functions above, but searches from the back to front.
constexpr bool contains(ITER first, ITER last, const VAL &val)
Check if a range contains a given value, using linear search.