openMSX
SimpleHashSet.hh
Go to the documentation of this file.
1#ifndef SIMPLEHASHSET_HH
2#define SIMPLEHASHSET_HH
3
4#include <algorithm>
5#include <bit>
6#include <cassert>
7#include <type_traits>
8
9// SimpleHashSet
10//
11// As the name implies this class is an implementation of a simple set based on
12// hashing. Compared to std::unordered_set it has these differences/limitations:
13//
14// * There must be one special value that cannot be present in the set.
15// This special value is used to indicate empty positions in the internal
16// table. Because of this, these special values should be cheap to construct.
17// * It's not a strict requirement, but the values in the set should also be
18// cheap to destruct, preferably even trivially destructible.
19// * When inserting more elements in this set, the already present elements may
20// move around in memory (e.g. because the internal table has run out of
21// capacity). In other words elements in this set do not have stable
22// addresses. (Because elements can move around, they should preferably be
23// cheap to move (same as with std::vector)).
24// * This set does not offer a fully STL-compatible interface. Though where
25// possible it does come close. For example it's not possible to iterate over
26// all elements in this set.
27//
28// The advantage of this class over std::unordered_set are (when the above
29// limitations are fine):
30// * SimpleHashSet performs better than std::unordered_set
31// * SimpleHashSet uses less memory than std::unordered_set
32// I verified this for simple types like 'uint32_t'.
33//
34// Note: some of the above limitation could be lifted, even without sacrificing
35// performance (e.g. create a conforming STL interface). Though I intentionally
36// kept this first implementation simple.
37
38template<auto InvalidValue, typename Hasher, typename Equality>
40{
41 using Value = decltype(InvalidValue);
42public:
43 SimpleHashSet(Hasher hasher_ = {}, Equality equality_ = {})
44 : hasher(hasher_), equality(equality_)
45 {
46 }
47
48 SimpleHashSet(const SimpleHashSet&) = delete;
52
54 {
55 destructTable();
56 }
57
58 void reserve(size_t n)
59 {
60 if (n <= capacity()) return;
61 grow(std::bit_ceil(2 * n));
62 assert(capacity() >= n);
63 }
64 [[nodiscard]] size_t capacity() const
65 {
66 return (mask + 1) / 2;
67 }
68
69 [[nodiscard]] size_t size() const
70 {
71 return num_elems;
72 }
73 [[nodiscard]] bool empty() const
74 {
75 return size() == 0;
76 }
77
78 template<typename Value2>
79 bool insert(Value2&& val)
80 {
81 auto h = hash(val) & mask;
82 if (table) {
83 while (table[h] != InvalidValue) {
84 if (equal(table[h], val)) {
85 return false; // already present
86 }
87 h = (h + 1) & mask;
88 }
89 }
90
91 if (size() < capacity()) {
92 table[h] = std::forward<Value2>(val);
93 } else {
94 growAndInsert(std::forward<Value2>(val));
95 }
96 ++num_elems;
97 return true;
98 }
99
100 template<typename Value2>
101 bool erase(const Value2& val)
102 {
103 auto pos1 = locate(val);
104 if (pos1 == size_t(-1)) return false; // was not present
105
106 // Element found, now plug the hole created at position 'pos1'.
107 auto mustMove = [&](size_t p1, size_t p2, size_t p3) {
108 p2 = (p2 - p1 - 1) & mask;
109 p3 = (p3 - p1 - 1) & mask;
110 return p2 < p3;
111 };
112 auto pos2 = pos1;
113 while (true) {
114 pos2 = (pos2 + 1) & mask;
115 if (table[pos2] == InvalidValue) {
116 // Found a free position. Remove 'pos1' and done.
117 table[pos1] = InvalidValue;
118 --num_elems;
119 return true;
120 }
121 // The element at position 'pos2' actually wants to be
122 // at position 'pos3'.
123 auto pos3 = hash(table[pos2] & mask);
124 if (mustMove(pos1, pos2, pos3)) {
125 // If the desired position is (cyclically)
126 // before or at the hole, then move the element
127 // and continue with a new hole.
128 table[pos1] = std::move(table[pos2]);
129 pos1 = pos2;
130 }
131 }
132 }
133
134 template<typename Value2>
135 [[nodiscard]] Value* find(const Value2& val) const
136 {
137 auto h = locate(val);
138 return (h == size_t(-1)) ? nullptr : &table[h];
139 }
140
141 template<typename Value2>
142 [[nodiscard]] bool contains(const Value2& val) const
143 {
144 return locate(val) != size_t(-1);
145 }
146
147private:
148 template<typename Value2>
149 [[nodiscard]] size_t hash(const Value2& val) const
150 {
151 return hasher(val);
152 }
153
154 template<typename Value2>
155 [[nodiscard]] bool equal(const Value& val, const Value2& val2) const
156 {
157 return equality(val, val2);
158 }
159
160 template<typename Value2>
161 [[nodiscard]] size_t locate(const Value2& val) const
162 {
163 if (table) {
164 auto h = hash(val) & mask;
165 while (table[h] != InvalidValue) {
166 if (equal(table[h], val)) {
167 return h;
168 }
169 h = (h + 1) & mask;
170 }
171 }
172 return size_t(-1);
173 }
174
175 // precondition: 'val' is not yet present and there is enough capacity
176 template<typename Value2>
177 void insertValue(Value2&& val, Value* tab, size_t msk)
178 {
179 auto h = hash(val) & msk;
180 while (tab[h] != InvalidValue) {
181 h = (h + 1) & msk;
182 }
183 tab[h] = std::forward<Value2>(val);
184 }
185
186 void grow(size_t newSize)
187 {
188 assert(std::has_single_bit(newSize));
189 assert(newSize > (mask + 1));
190
191 auto* newTable = static_cast<Value*>(malloc(newSize * sizeof(Value)));
192 std::uninitialized_fill(newTable, newTable + newSize, InvalidValue);
193
194 size_t newMask = newSize - 1;
195 if (table) {
196 for (size_t i = 0; i <= mask; ++i) {
197 if (table[i] != InvalidValue) {
198 insertValue(std::move(table[i]), newTable, newMask);
199 }
200 }
201 destructTable();
202 }
203
204 table = newTable;
205 mask = static_cast<decltype(mask)>(newMask);
206 }
207
208 template<typename Value2>
209 void growAndInsert(Value2&& val)
210 {
211 grow(std::max<size_t>(4, 2 * (mask + 1)));
212 insertValue(std::forward<Value2>(val), table, mask);
213 }
214
215 void destructTable()
216 {
217 if (!std::is_trivially_destructible_v<Value> && table) {
218 for (size_t i = 0; i <= mask; ++i) {
219 table[i].~Value();
220 }
221 }
222 free(table);
223 }
224
225private:
226 [[no_unique_address]] Hasher hasher;
227 [[no_unique_address]] Equality equality;
228 Value* table = nullptr;
229 uint32_t mask = 0; // always one less than a power-of-2 (0, 1, 3, 7, 15, ...)
230 uint32_t num_elems = 0;
231};
232
233#endif
SimpleHashSet & operator=(SimpleHashSet &&)=delete
Value * find(const Value2 &val) const
size_t capacity() const
size_t size() const
SimpleHashSet & operator=(const SimpleHashSet &)=delete
bool insert(Value2 &&val)
SimpleHashSet(const SimpleHashSet &)=delete
bool erase(const Value2 &val)
bool empty() const
SimpleHashSet(SimpleHashSet &&)=delete
bool contains(const Value2 &val) const
SimpleHashSet(Hasher hasher_={}, Equality equality_={})
void reserve(size_t n)
mat3 p3(vec3(1, 2, 3), vec3(4, 5, 6), vec3(7, 0, 9))