openMSX
SimpleHashSet.hh
Go to the documentation of this file.
1 #ifndef SIMPLEHASHSET_HH
2 #define SIMPLEHASHSET_HH
3 
4 #include "Math.hh"
5 #include <algorithm>
6 #include <cassert>
7 #include <type_traits>
8 
9 #if __has_cpp_attribute(no_unique_address)
10 #define NO_UNIQUE_ADDRESS [[no_unique_address]]
11 #else
12 #define NO_UNIQUE_ADDRESS
13 #endif
14 
15 // SimpleHashSet
16 //
17 // As the name implies this class is an implementation of a simple set based on
18 // hashing. Compared to std::unordered_set it has these differences/limitations:
19 //
20 // * There must be one special value that cannot be present in the set.
21 // This special value is used to indicate empty positions in the internal
22 // table. Because of this, these special values should be cheap to construct.
23 // * It's not a strict requirement, but the values in the set should also be
24 // cheap to destruct, preferably even trivially destructible.
25 // * When inserting more elements in this set, the already present elements may
26 // move around in memory (e.g. because the internal table has run out of
27 // capacity). In other words elements in this set do not have stable
28 // addresses. (Because elemnts can move around, they should preferably be
29 // cheap to move (same as with std::vector)).
30 // * This set does not offer a fully STL-compatible interface. Though where
31 // possible it does come close. For example it's not possible to iterate over
32 // all elements in this set.
33 //
34 // The advantage of this class over std::unordered_set are (when the above
35 // limitations are fine):
36 // * SimpleHashSet performs better than std::unordered_set
37 // * SimpleHashSet uses less memory than std::unordered_set
38 // I verified this for simple types like 'uint32_t'.
39 //
40 // Note: some of the above limitation could be lifted, even without sacrificing
41 // performance (e.g. create a conforming STL interface). Though I intentionally
42 // kept this first implementation simple.
43 
44 template<typename Value, Value InvalidValue, typename Hasher, typename Equality>
46 {
47 public:
48  SimpleHashSet(Hasher hasher_ = {}, Equality equality_ = {})
49  : hasher(hasher_), equality(equality_)
50  {
51  }
52 
53  SimpleHashSet(const SimpleHashSet&) = delete;
57 
59  {
60  destructTable();
61  }
62 
63  void reserve(size_t n)
64  {
65  if (n <= capacity()) return;
66  grow(Math::ceil2(2 * n));
67  assert(capacity() >= n);
68  }
69  size_t capacity() const
70  {
71  return (mask + 1) / 2;
72  }
73 
74  size_t size() const
75  {
76  return num_elems;
77  }
78  bool empty() const
79  {
80  return size() == 0;
81  }
82 
83  template<typename Value2>
84  bool insert(Value2&& val)
85  {
86  auto h = hash(val) & mask;
87  if (table) {
88  while (table[h] != InvalidValue) {
89  if (equal(table[h], val)) {
90  return false; // already present
91  }
92  h = (h + 1) & mask;
93  }
94  }
95 
96  if (size() < capacity()) {
97  table[h] = std::forward<Value2>(val);
98  } else {
99  growAndInsert(std::forward<Value2>(val));
100  }
101  ++num_elems;
102  return true;
103  }
104 
105  template<typename Value2>
106  bool erase(const Value2& val)
107  {
108  auto pos1 = locate(val);
109  if (pos1 == size_t(-1)) return false; // was not presnt
110 
111  // Element found, now plug the hole created at position 'pos1'.
112  auto mustMove = [&](size_t p1, size_t p2, size_t p3) {
113  p2 = (p2 - p1 - 1) & mask;
114  p3 = (p3 - p1 - 1) & mask;
115  return p2 < p3;
116  };
117  auto pos2 = pos1;
118  while (true) {
119  pos2 = (pos2 + 1) & mask;
120  if (table[pos2] == InvalidValue) {
121  // Found a free position. Remove 'pos1' and done.
122  table[pos1] = InvalidValue;
123  --num_elems;
124  return true;
125  }
126  // The element at position 'pos2' actually wants to be
127  // at position 'pos3'.
128  auto pos3 = hash(table[pos2] & mask);
129  if (mustMove(pos1, pos2, pos3)) {
130  // If the desired position is (cyclically)
131  // before or at the hole, then move the element
132  // and continue with a new hole.
133  table[pos1] = std::move(table[pos2]);
134  pos1 = pos2;
135  }
136  }
137  }
138 
139  template<typename Value2>
140  Value* find(const Value2& val) const
141  {
142  auto h = locate(val);
143  return (h == size_t(-1)) ? nullptr : &table[h];
144  }
145 
146  template<typename Value2>
147  bool contains(const Value2& val) const
148  {
149  return locate(val) != size_t(-1);
150  }
151 
152 private:
153  template<typename Value2>
154  size_t hash(const Value2& val) const
155  {
156  return hasher(val);
157  }
158 
159  template<typename Value2>
160  bool equal(const Value& val, const Value2& val2) const
161  {
162  return equality(val, val2);
163  }
164 
165  template<typename Value2>
166  size_t locate(const Value2& val) const
167  {
168  if (table) {
169  auto h = hash(val) & mask;
170  while (table[h] != InvalidValue) {
171  if (equal(table[h], val)) {
172  return h;
173  }
174  h = (h + 1) & mask;
175  }
176  }
177  return size_t(-1);
178  }
179 
180  // precondition: 'val' is not yet present and there is enough capacity
181  template<typename Value2>
182  void insertValue(Value2&& val, Value* tab, size_t msk)
183  {
184  auto h = hash(val) & msk;
185  while (tab[h] != InvalidValue) {
186  h = (h + 1) & msk;
187  }
188  tab[h] = std::forward<Value2>(val);
189  }
190 
191  void grow(size_t newSize)
192  {
193  assert(Math::ispow2(newSize));
194  assert(newSize > (mask + 1));
195 
196  auto* newTable = static_cast<Value*>(malloc(newSize * sizeof(Value)));
197  std::uninitialized_fill(newTable, newTable + newSize, InvalidValue);
198 
199  size_t newMask = newSize - 1;
200  if (table) {
201  for (size_t i = 0; i <= mask; ++i) {
202  if (table[i] != InvalidValue) {
203  insertValue(std::move(table[i]), newTable, newMask);
204  }
205  }
206  destructTable();
207  }
208 
209  table = newTable;
210  mask = newMask;
211  }
212 
213  template<typename Value2>
214  void growAndInsert(Value2&& val)
215  {
216  grow(std::max<size_t>(4, 2 * (mask + 1)));
217  insertValue(std::forward<Value2>(val), table, mask);
218  }
219 
220  void destructTable()
221  {
222  if (!std::is_trivially_destructible_v<Value> && table) {
223  for (size_t i = 0; i <= mask; ++i) {
224  table[i].~Value();
225  }
226  }
227  free(table);
228  }
229 
230 private:
231  NO_UNIQUE_ADDRESS Hasher hasher;
232  NO_UNIQUE_ADDRESS Equality equality;
233  Value* table = nullptr;
234  uint32_t mask = 0; // always one less than a power-of-2 (0, 1, 3, 7, 15, ...)
235  uint32_t num_elems = 0;
236 };
237 
238 #endif
SimpleHashSet::operator=
SimpleHashSet & operator=(SimpleHashSet &&)=delete
p3
mat3 p3(vec3(1, 2, 3), vec3(4, 5, 6), vec3(7, 0, 9))
SimpleHashSet::reserve
void reserve(size_t n)
Definition: SimpleHashSet.hh:63
SimpleHashSet
Definition: SimpleHashSet.hh:46
SimpleHashSet::size
size_t size() const
Definition: SimpleHashSet.hh:74
Math::ispow2
constexpr bool ispow2(T x) noexcept
Is the given number an integral power of two? That is, does it have exactly one 1-bit in binary repre...
Definition: Math.hh:57
SimpleHashSet::capacity
size_t capacity() const
Definition: SimpleHashSet.hh:69
SimpleHashSet::insert
bool insert(Value2 &&val)
Definition: SimpleHashSet.hh:84
SimpleHashSet::SimpleHashSet
SimpleHashSet(SimpleHashSet &&)=delete
SimpleHashSet::find
Value * find(const Value2 &val) const
Definition: SimpleHashSet.hh:140
SimpleHashSet::empty
bool empty() const
Definition: SimpleHashSet.hh:78
NO_UNIQUE_ADDRESS
#define NO_UNIQUE_ADDRESS
Definition: SimpleHashSet.hh:12
SimpleHashSet::erase
bool erase(const Value2 &val)
Definition: SimpleHashSet.hh:106
SimpleHashSet::operator=
SimpleHashSet & operator=(const SimpleHashSet &)=delete
SimpleHashSet::contains
bool contains(const Value2 &val) const
Definition: SimpleHashSet.hh:147
Math.hh
SimpleHashSet::~SimpleHashSet
~SimpleHashSet()
Definition: SimpleHashSet.hh:58
Math::ceil2
constexpr T ceil2(T x) noexcept
Returns the smallest number that is both >=a and a power of two.
Definition: Math.hh:85
SimpleHashSet::SimpleHashSet
SimpleHashSet(Hasher hasher_={}, Equality equality_={})
Definition: SimpleHashSet.hh:48
SimpleHashSet::SimpleHashSet
SimpleHashSet(const SimpleHashSet &)=delete