openMSX
SimpleHashSet.hh
Go to the documentation of this file.
1 #ifndef SIMPLEHASHSET_HH
2 #define SIMPLEHASHSET_HH
3 
4 #include <algorithm>
5 #include <bit>
6 #include <cassert>
7 #include <type_traits>
8 
9 // SimpleHashSet
10 //
11 // As the name implies this class is an implementation of a simple set based on
12 // hashing. Compared to std::unordered_set it has these differences/limitations:
13 //
14 // * There must be one special value that cannot be present in the set.
15 // This special value is used to indicate empty positions in the internal
16 // table. Because of this, these special values should be cheap to construct.
17 // * It's not a strict requirement, but the values in the set should also be
18 // cheap to destruct, preferably even trivially destructible.
19 // * When inserting more elements in this set, the already present elements may
20 // move around in memory (e.g. because the internal table has run out of
21 // capacity). In other words elements in this set do not have stable
22 // addresses. (Because elemnts can move around, they should preferably be
23 // cheap to move (same as with std::vector)).
24 // * This set does not offer a fully STL-compatible interface. Though where
25 // possible it does come close. For example it's not possible to iterate over
26 // all elements in this set.
27 //
28 // The advantage of this class over std::unordered_set are (when the above
29 // limitations are fine):
30 // * SimpleHashSet performs better than std::unordered_set
31 // * SimpleHashSet uses less memory than std::unordered_set
32 // I verified this for simple types like 'uint32_t'.
33 //
34 // Note: some of the above limitation could be lifted, even without sacrificing
35 // performance (e.g. create a conforming STL interface). Though I intentionally
36 // kept this first implementation simple.
37 
38 template<typename Value, Value InvalidValue, typename Hasher, typename Equality>
40 {
41 public:
42  SimpleHashSet(Hasher hasher_ = {}, Equality equality_ = {})
43  : hasher(hasher_), equality(equality_)
44  {
45  }
46 
47  SimpleHashSet(const SimpleHashSet&) = delete;
51 
53  {
54  destructTable();
55  }
56 
57  void reserve(size_t n)
58  {
59  if (n <= capacity()) return;
60  grow(std::bit_ceil(2 * n));
61  assert(capacity() >= n);
62  }
63  [[nodiscard]] size_t capacity() const
64  {
65  return (mask + 1) / 2;
66  }
67 
68  [[nodiscard]] size_t size() const
69  {
70  return num_elems;
71  }
72  [[nodiscard]] bool empty() const
73  {
74  return size() == 0;
75  }
76 
77  template<typename Value2>
78  bool insert(Value2&& val)
79  {
80  auto h = hash(val) & mask;
81  if (table) {
82  while (table[h] != InvalidValue) {
83  if (equal(table[h], val)) {
84  return false; // already present
85  }
86  h = (h + 1) & mask;
87  }
88  }
89 
90  if (size() < capacity()) {
91  table[h] = std::forward<Value2>(val);
92  } else {
93  growAndInsert(std::forward<Value2>(val));
94  }
95  ++num_elems;
96  return true;
97  }
98 
99  template<typename Value2>
100  bool erase(const Value2& val)
101  {
102  auto pos1 = locate(val);
103  if (pos1 == size_t(-1)) return false; // was not present
104 
105  // Element found, now plug the hole created at position 'pos1'.
106  auto mustMove = [&](size_t p1, size_t p2, size_t p3) {
107  p2 = (p2 - p1 - 1) & mask;
108  p3 = (p3 - p1 - 1) & mask;
109  return p2 < p3;
110  };
111  auto pos2 = pos1;
112  while (true) {
113  pos2 = (pos2 + 1) & mask;
114  if (table[pos2] == InvalidValue) {
115  // Found a free position. Remove 'pos1' and done.
116  table[pos1] = InvalidValue;
117  --num_elems;
118  return true;
119  }
120  // The element at position 'pos2' actually wants to be
121  // at position 'pos3'.
122  auto pos3 = hash(table[pos2] & mask);
123  if (mustMove(pos1, pos2, pos3)) {
124  // If the desired position is (cyclically)
125  // before or at the hole, then move the element
126  // and continue with a new hole.
127  table[pos1] = std::move(table[pos2]);
128  pos1 = pos2;
129  }
130  }
131  }
132 
133  template<typename Value2>
134  [[nodiscard]] Value* find(const Value2& val) const
135  {
136  auto h = locate(val);
137  return (h == size_t(-1)) ? nullptr : &table[h];
138  }
139 
140  template<typename Value2>
141  [[nodiscard]] bool contains(const Value2& val) const
142  {
143  return locate(val) != size_t(-1);
144  }
145 
146 private:
147  template<typename Value2>
148  [[nodiscard]] size_t hash(const Value2& val) const
149  {
150  return hasher(val);
151  }
152 
153  template<typename Value2>
154  [[nodiscard]] bool equal(const Value& val, const Value2& val2) const
155  {
156  return equality(val, val2);
157  }
158 
159  template<typename Value2>
160  [[nodiscard]] size_t locate(const Value2& val) const
161  {
162  if (table) {
163  auto h = hash(val) & mask;
164  while (table[h] != InvalidValue) {
165  if (equal(table[h], val)) {
166  return h;
167  }
168  h = (h + 1) & mask;
169  }
170  }
171  return size_t(-1);
172  }
173 
174  // precondition: 'val' is not yet present and there is enough capacity
175  template<typename Value2>
176  void insertValue(Value2&& val, Value* tab, size_t msk)
177  {
178  auto h = hash(val) & msk;
179  while (tab[h] != InvalidValue) {
180  h = (h + 1) & msk;
181  }
182  tab[h] = std::forward<Value2>(val);
183  }
184 
185  void grow(size_t newSize)
186  {
187  assert(std::has_single_bit(newSize));
188  assert(newSize > (mask + 1));
189 
190  auto* newTable = static_cast<Value*>(malloc(newSize * sizeof(Value)));
191  std::uninitialized_fill(newTable, newTable + newSize, InvalidValue);
192 
193  size_t newMask = newSize - 1;
194  if (table) {
195  for (size_t i = 0; i <= mask; ++i) {
196  if (table[i] != InvalidValue) {
197  insertValue(std::move(table[i]), newTable, newMask);
198  }
199  }
200  destructTable();
201  }
202 
203  table = newTable;
204  mask = static_cast<decltype(mask)>(newMask);
205  }
206 
207  template<typename Value2>
208  void growAndInsert(Value2&& val)
209  {
210  grow(std::max<size_t>(4, 2 * (mask + 1)));
211  insertValue(std::forward<Value2>(val), table, mask);
212  }
213 
214  void destructTable()
215  {
216  if (!std::is_trivially_destructible_v<Value> && table) {
217  for (size_t i = 0; i <= mask; ++i) {
218  table[i].~Value();
219  }
220  }
221  free(table);
222  }
223 
224 private:
225  [[no_unique_address]] Hasher hasher;
226  [[no_unique_address]] Equality equality;
227  Value* table = nullptr;
228  uint32_t mask = 0; // always one less than a power-of-2 (0, 1, 3, 7, 15, ...)
229  uint32_t num_elems = 0;
230 };
231 
232 #endif
size_t capacity() const
Value * find(const Value2 &val) const
SimpleHashSet & operator=(SimpleHashSet &&)=delete
size_t size() const
bool contains(const Value2 &val) const
SimpleHashSet & operator=(const SimpleHashSet &)=delete
bool insert(Value2 &&val)
void reserve(size_t n)
bool erase(const Value2 &val)
bool empty() const
SimpleHashSet(Hasher hasher_={}, Equality equality_={})
SimpleHashSet(const SimpleHashSet &)=delete
SimpleHashSet(SimpleHashSet &&)=delete
mat3 p3(vec3(1, 2, 3), vec3(4, 5, 6), vec3(7, 0, 9))