openMSX
SimpleHashSet.hh
Go to the documentation of this file.
1#ifndef SIMPLEHASHSET_HH
2#define SIMPLEHASHSET_HH
3
4#include <algorithm>
5#include <bit>
6#include <cassert>
7#include <type_traits>
8
9// SimpleHashSet
10//
11// As the name implies this class is an implementation of a simple set based on
12// hashing. Compared to std::unordered_set it has these differences/limitations:
13//
14// * There must be one special value that cannot be present in the set.
15// This special value is used to indicate empty positions in the internal
16// table. Because of this, these special values should be cheap to construct.
17// * It's not a strict requirement, but the values in the set should also be
18// cheap to destruct, preferably even trivially destructible.
19// * When inserting more elements in this set, the already present elements may
20// move around in memory (e.g. because the internal table has run out of
21// capacity). In other words elements in this set do not have stable
22// addresses. (Because elements can move around, they should preferably be
23// cheap to move (same as with std::vector)).
24// * This set does not offer a fully STL-compatible interface. Though where
25// possible it does come close. For example it's not possible to iterate over
26// all elements in this set.
27//
28// The advantage of this class over std::unordered_set are (when the above
29// limitations are fine):
30// * SimpleHashSet performs better than std::unordered_set
31// * SimpleHashSet uses less memory than std::unordered_set
32// I verified this for simple types like 'uint32_t'.
33//
34// Note: some of the above limitation could be lifted, even without sacrificing
35// performance (e.g. create a conforming STL interface). Though I intentionally
36// kept this first implementation simple.
37
38template<typename Value, Value InvalidValue, typename Hasher, typename Equality>
40{
41public:
42 SimpleHashSet(Hasher hasher_ = {}, Equality equality_ = {})
43 : hasher(hasher_), equality(equality_)
44 {
45 }
46
47 SimpleHashSet(const SimpleHashSet&) = delete;
51
53 {
54 destructTable();
55 }
56
57 void reserve(size_t n)
58 {
59 if (n <= capacity()) return;
60 grow(std::bit_ceil(2 * n));
61 assert(capacity() >= n);
62 }
63 [[nodiscard]] size_t capacity() const
64 {
65 return (mask + 1) / 2;
66 }
67
68 [[nodiscard]] size_t size() const
69 {
70 return num_elems;
71 }
72 [[nodiscard]] bool empty() const
73 {
74 return size() == 0;
75 }
76
77 template<typename Value2>
78 bool insert(Value2&& val)
79 {
80 auto h = hash(val) & mask;
81 if (table) {
82 while (table[h] != InvalidValue) {
83 if (equal(table[h], val)) {
84 return false; // already present
85 }
86 h = (h + 1) & mask;
87 }
88 }
89
90 if (size() < capacity()) {
91 table[h] = std::forward<Value2>(val);
92 } else {
93 growAndInsert(std::forward<Value2>(val));
94 }
95 ++num_elems;
96 return true;
97 }
98
99 template<typename Value2>
100 bool erase(const Value2& val)
101 {
102 auto pos1 = locate(val);
103 if (pos1 == size_t(-1)) return false; // was not present
104
105 // Element found, now plug the hole created at position 'pos1'.
106 auto mustMove = [&](size_t p1, size_t p2, size_t p3) {
107 p2 = (p2 - p1 - 1) & mask;
108 p3 = (p3 - p1 - 1) & mask;
109 return p2 < p3;
110 };
111 auto pos2 = pos1;
112 while (true) {
113 pos2 = (pos2 + 1) & mask;
114 if (table[pos2] == InvalidValue) {
115 // Found a free position. Remove 'pos1' and done.
116 table[pos1] = InvalidValue;
117 --num_elems;
118 return true;
119 }
120 // The element at position 'pos2' actually wants to be
121 // at position 'pos3'.
122 auto pos3 = hash(table[pos2] & mask);
123 if (mustMove(pos1, pos2, pos3)) {
124 // If the desired position is (cyclically)
125 // before or at the hole, then move the element
126 // and continue with a new hole.
127 table[pos1] = std::move(table[pos2]);
128 pos1 = pos2;
129 }
130 }
131 }
132
133 template<typename Value2>
134 [[nodiscard]] Value* find(const Value2& val) const
135 {
136 auto h = locate(val);
137 return (h == size_t(-1)) ? nullptr : &table[h];
138 }
139
140 template<typename Value2>
141 [[nodiscard]] bool contains(const Value2& val) const
142 {
143 return locate(val) != size_t(-1);
144 }
145
146private:
147 template<typename Value2>
148 [[nodiscard]] size_t hash(const Value2& val) const
149 {
150 return hasher(val);
151 }
152
153 template<typename Value2>
154 [[nodiscard]] bool equal(const Value& val, const Value2& val2) const
155 {
156 return equality(val, val2);
157 }
158
159 template<typename Value2>
160 [[nodiscard]] size_t locate(const Value2& val) const
161 {
162 if (table) {
163 auto h = hash(val) & mask;
164 while (table[h] != InvalidValue) {
165 if (equal(table[h], val)) {
166 return h;
167 }
168 h = (h + 1) & mask;
169 }
170 }
171 return size_t(-1);
172 }
173
174 // precondition: 'val' is not yet present and there is enough capacity
175 template<typename Value2>
176 void insertValue(Value2&& val, Value* tab, size_t msk)
177 {
178 auto h = hash(val) & msk;
179 while (tab[h] != InvalidValue) {
180 h = (h + 1) & msk;
181 }
182 tab[h] = std::forward<Value2>(val);
183 }
184
185 void grow(size_t newSize)
186 {
187 assert(std::has_single_bit(newSize));
188 assert(newSize > (mask + 1));
189
190 auto* newTable = static_cast<Value*>(malloc(newSize * sizeof(Value)));
191 std::uninitialized_fill(newTable, newTable + newSize, InvalidValue);
192
193 size_t newMask = newSize - 1;
194 if (table) {
195 for (size_t i = 0; i <= mask; ++i) {
196 if (table[i] != InvalidValue) {
197 insertValue(std::move(table[i]), newTable, newMask);
198 }
199 }
200 destructTable();
201 }
202
203 table = newTable;
204 mask = static_cast<decltype(mask)>(newMask);
205 }
206
207 template<typename Value2>
208 void growAndInsert(Value2&& val)
209 {
210 grow(std::max<size_t>(4, 2 * (mask + 1)));
211 insertValue(std::forward<Value2>(val), table, mask);
212 }
213
214 void destructTable()
215 {
216 if (!std::is_trivially_destructible_v<Value> && table) {
217 for (size_t i = 0; i <= mask; ++i) {
218 table[i].~Value();
219 }
220 }
221 free(table);
222 }
223
224private:
225 [[no_unique_address]] Hasher hasher;
226 [[no_unique_address]] Equality equality;
227 Value* table = nullptr;
228 uint32_t mask = 0; // always one less than a power-of-2 (0, 1, 3, 7, 15, ...)
229 uint32_t num_elems = 0;
230};
231
232#endif
size_t capacity() const
size_t size() const
bool contains(const Value2 &val) const
SimpleHashSet & operator=(const SimpleHashSet &)=delete
SimpleHashSet & operator=(SimpleHashSet &&)=delete
bool insert(Value2 &&val)
Value * find(const Value2 &val) const
void reserve(size_t n)
bool erase(const Value2 &val)
bool empty() const
SimpleHashSet(Hasher hasher_={}, Equality equality_={})
SimpleHashSet(const SimpleHashSet &)=delete
SimpleHashSet(SimpleHashSet &&)=delete
mat3 p3(vec3(1, 2, 3), vec3(4, 5, 6), vec3(7, 0, 9))