openMSX
StringMap.hh
Go to the documentation of this file.
1 #ifndef STRINGMAP_HH
2 #define STRINGMAP_HH
3 
4 // This is based on the StringMap class in LLVM:
5 // http://llvm.org/docs/ProgrammersManual.html#dss_stringmap
6 // See also that page for a more detailed descrption.
7 //
8 // Very briefly: the class StringMap<T> offers a hashtable-based map. The keys
9 // are strings (or any byte-array with variable length) and the value can be
10 // any type. Both keys and values are stored in (=owned by) the map.
11 //
12 // The interface of the StringMap<T> class is very close to the STL
13 // std::map<std::string, T> class. Though one notable difference is the use of
14 // 'first'. An example:
15 //
16 // using MySTLmap = std::map<std::string, int>;
17 // MySTLmap m1 = ...;
18 // MySTLmap::const_iterator it1 = m1.find("abc"); // *1*
19 // assert(it1->first == "abc");
20 // int i1 = it1->second;
21 //
22 // using MyHashmap = StringMap<int>;
23 // MyHashmap m2 = ...;
24 // MyHashmap::const_iterator it = m2.find("abc"); // *2
25 // assert(it2->first() == "abc");
26 // int i2 = it2->second;
27 //
28 // So basically you only have to replace 'first' with 'first()'. Note that on
29 // the line marked with *1* a temporary std::string object needs to be
30 // constructed and destructed. The corresponding line *2* has no such overhead.
31 
32 
33 #include "string_ref.hh"
34 #include "likely.hh"
35 #include <cassert>
36 #include <cstring> // memcpy
37 #include <cstdlib> // malloc, free
38 #include <new> // bad_alloc
39 
40 template<typename T> class StringMapConstIterator;
41 template<typename T> class StringMapIterator;
42 
43 // Non-templatized base class of StringMapEntry<T>.
45 {
46 public:
47  explicit StringMapEntryBase(unsigned len) : strLen(len) {}
48  unsigned getKeyLength() const { return strLen; }
49 private:
50  unsigned strLen;
51 };
52 
53 
56 {
57 public:
59  return reinterpret_cast<StringMapEntryBase*>(-1);
60  }
61 
62  unsigned getNumBuckets() const { return numBuckets; }
63  bool empty() const { return numItems == 0; }
64  unsigned size() const { return numItems; }
65 
66 protected:
67  StringMapImpl(unsigned itemSize, unsigned initSize);
68  ~StringMapImpl() { free(theTable); }
69 
70  // Grow the table, redistributing values into the buckets with the
71  // appropriate mod-of-hashtable-size.
72  void rehashTable();
73 
74  void init(unsigned size);
75  unsigned* getHashTable() const {
76  return reinterpret_cast<unsigned*>(theTable + numBuckets + 1);
77  }
78 
79  // Array of numBuckets pointers to entries, nullptrs are holes.
80  // theTable[numBuckets] contains a sentinel value for easy iteration.
81  // Followed by an array of the actual hash values as unsigned integers.
83  unsigned numBuckets;
84  unsigned numItems;
85  unsigned numTombstones;
86  const unsigned itemSize;
87 };
88 
89 template<bool CASE_SENSITIVE> class StringMapImpl2
90  : public StringMapImpl
91 {
92 protected:
93  StringMapImpl2(unsigned itemSize, unsigned initSize);
94 
95  // Look up the bucket that the specified string should end up in. If it
96  // already exists as a key in the map, the Item pointer for the
97  // specified bucket will be non-null. Otherwise, it will be null. In
98  // either case, the FullHashValue field of the bucket will be set to
99  // the hash value of the string.
100  unsigned lookupBucketFor(string_ref key);
101 
102  // Look up the bucket that contains the specified key. If it exists in
103  // the map, return the bucket number of the key. Otherwise return -1.
104  // This does not modify the map.
105  int findKey(string_ref key) const;
106 
107  // Remove the specified StringMapEntry from the table, but do not
108  // delete it. This aborts if the value isn't in the table.
109  void removeKey(StringMapEntryBase *V);
110 
111  // Remove the StringMapEntry for the specified key from the table,
112  // returning it. If the key is not in the table, this returns null.
114 };
115 
116 
117 // This is used to represent one value that is inserted into a StringMap.
118 // It contains the value itself and the key (string-length + string-data).
119 template<typename T> class StringMapEntry : public StringMapEntryBase
120 {
121 public:
123 
124 public:
125  // Create a StringMapEntry for the specified key and value.
126  static StringMapEntry* create(string_ref key, T v)
127  {
128  // Allocate memory.
129  auto newItem = static_cast<StringMapEntry*>(
130  malloc(sizeof(StringMapEntry) + key.size()));
131  if (unlikely(!newItem)) {
132  throw std::bad_alloc();
133  }
134 
135  // Construct the value (using placement new).
136  new (newItem) StringMapEntry(unsigned(key.size()), std::move(v));
137 
138  // Copy the string data.
139  auto strBuffer = const_cast<char*>(newItem->getKeyData());
140  memcpy(strBuffer, key.data(), key.size());
141 
142  return newItem;
143  }
144 
145  // Destroy this StringMapEntry, releasing memory.
146  void destroy() {
147  this->~StringMapEntry();
148  free(this);
149  }
150 
151  string_ref getKey() const {
152  return string_ref(getKeyData(), getKeyLength());
153  }
154  string_ref first() const { return getKey(); }
155 
156  const T& getValue() const { return second; }
157  T& getValue() { return second; }
158  void setValue(const T& v) { second = v; }
159 
160  // Return the start of the string data that is the key for this value.
161  const char* getKeyData() const {
162  // The string data is stored immediately after this object.
163  return reinterpret_cast<const char*>(this + 1);
164  }
165 
166  // Given a value that is known to be embedded into a StringMapEntry,
167  // return the StringMapEntry itself.
169  StringMapEntry* ePtr = 0;
170  auto ptr = reinterpret_cast<char*>(&v) -
171  (reinterpret_cast<char*>(&ePtr->second) -
172  reinterpret_cast<char*>(ePtr));
173  return *reinterpret_cast<StringMapEntry*>(ptr);
174  }
175  static const StringMapEntry& GetStringMapEntryFromValue(const T &v) {
176  return GetStringMapEntryFromValue(const_cast<T&>(v));
177  }
178 
179  // Given key data that is known to be embedded into a StringMapEntry,
180  // return the StringMapEntry itself.
181  static StringMapEntry& GetStringMapEntryFromKeyData(const char* keyData) {
182  auto ptr = const_cast<char*>(keyData) - sizeof(StringMapEntry<T>);
183  return *reinterpret_cast<StringMapEntry*>(ptr);
184  }
185 
186 private:
187  StringMapEntry(unsigned strLen, T v)
188  : StringMapEntryBase(strLen), second(std::move(v)) {}
189 
190  ~StringMapEntry() {}
191 };
192 
193 
194 // This is an unconventional map that is specialized for handling keys that are
195 // "strings", which are basically ranges of bytes. This does some funky memory
196 // allocation and hashing things to make it extremely efficient, storing the
197 // string data *after* the value in the map.
198 template<typename T, bool CASE_SENSITIVE = true>
199 class StringMap : public StringMapImpl2<CASE_SENSITIVE>
200 {
201 public:
202  using key_type = const char*;
203  using mapped_type = T;
205  using size_type = size_t;
208 
209  explicit StringMap(unsigned initialSize = 0)
210  : StringMapImpl2<CASE_SENSITIVE>(sizeof(value_type), initialSize)
211  {}
212 
214  clear();
215  }
216 
218  return iterator(this->theTable, this->numBuckets != 0);
219  }
221  return const_iterator(this->theTable, this->numBuckets != 0);
222  }
224  return iterator(this->theTable + this->numBuckets);
225  }
226  const_iterator end() const {
227  return const_iterator(this->theTable + this->numBuckets);
228  }
229 
231  int bucket = this->findKey(key);
232  if (bucket == -1) return end();
233  return iterator(this->theTable + bucket);
234  }
236  int bucket = this->findKey(key);
237  if (bucket == -1) return end();
238  return const_iterator(this->theTable + bucket);
239  }
240 
241  // Return the entry for the specified key, or a default constructed
242  // value if no such entry exists.
243  T lookup(string_ref key) const {
244  const_iterator it = find(key);
245  if (it != end()) return it->second;
246  return T();
247  }
248 
250  return getOrCreateValue(key).second;
251  }
252 
254  return (find(key) == end()) ? 0 : 1;
255  }
256 
257  // Insert the specified key/value pair into the map. If the key already
258  // exists in the map, return false and ignore the request, otherwise
259  // insert it and return true.
260  bool insert(value_type* keyValue)
261  {
262  unsigned bucketNo = this->lookupBucketFor(keyValue->getKey());
263  StringMapEntryBase*& bucket = this->theTable[bucketNo];
264  if (bucket && (bucket != this->getTombstoneVal())) {
265  return false; // Already exists in map.
266  }
267  if (bucket == this->getTombstoneVal()) {
268  --this->numTombstones;
269  }
270  bucket = keyValue;
271  ++this->numItems;
272  assert(this->numItems + this->numTombstones <= this->numBuckets);
273 
274  this->rehashTable();
275  return true;
276  }
277 
278  // Empties out the StringMap
279  void clear()
280  {
281  if (this->empty()) return;
282 
283  // Zap all values, resetting the keys back to non-present (not
284  // tombstone), which is safe because we're removing all
285  // elements.
286  for (unsigned i = 0; i != this->numBuckets; ++i) {
287  StringMapEntryBase*& bucket = this->theTable[i];
288  if (bucket && (bucket != this->getTombstoneVal())) {
289  static_cast<value_type*>(bucket)->destroy();
290  }
291  bucket = nullptr;
292  }
293 
294  this->numItems = 0;
295  this->numTombstones = 0;
296  }
297 
298  // Look up the specified key in the table. If a value exists, return
299  // it. Otherwise, default construct a value, insert it, and return.
301  {
302  unsigned bucketNo = this->lookupBucketFor(key);
303  StringMapEntryBase*& bucket = this->theTable[bucketNo];
304  if (bucket && (bucket != this->getTombstoneVal())) {
305  return *static_cast<value_type*>(bucket);
306  }
307 
308  value_type* newItem = value_type::create(key, std::move(val));
309 
310  if (bucket == this->getTombstoneVal()) --this->numTombstones;
311  ++this->numItems;
312  assert(this->numItems + this->numTombstones <= this->numBuckets);
313 
314  // Fill in the bucket for the hash table. The FullHashValue was already
315  // filled in by lookupBucketFor().
316  bucket = newItem;
317 
318  this->rehashTable();
319  return *newItem;
320  }
321 
322  // Remove the specified key/value pair from the map, but do not destroy
323  // it. This aborts if the key is not in the map.
324  void remove(value_type* keyValue) {
325  this->removeKey(keyValue);
326  }
327 
328  void erase(iterator i) {
329  value_type& v = *i;
330  remove(&v);
331  v.destroy();
332  }
333 
334  bool erase(string_ref key) {
335  iterator i = find(key);
336  if (i == end()) return false;
337  erase(i);
338  return true;
339  }
340 
341 private:
342  // disable copy and assign
343  StringMap(const StringMap&);
344  StringMap& operator=(const StringMap&);
345 };
346 
347 
348 template<typename T> class StringMapConstIterator
349 {
350 public:
352 
354  StringMapEntryBase** bucket, bool advance = false)
355  : ptr(bucket)
356  {
357  if (advance) advancePastEmptyBuckets();
358  }
359 
360  const value_type& operator*() const {
361  return *static_cast<value_type*>(*ptr);
362  }
363  const value_type* operator->() const {
364  return static_cast<value_type*>(*ptr);
365  }
366 
367  bool operator==(const StringMapConstIterator& rhs) const {
368  return ptr == rhs.ptr;
369  }
370  bool operator!=(const StringMapConstIterator& rhs) const {
371  return ptr != rhs.ptr;
372  }
373 
374  inline StringMapConstIterator& operator++() { // preincrement
375  ++ptr;
376  advancePastEmptyBuckets();
377  return *this;
378  }
379  StringMapConstIterator operator++(int) { // postincrement
380  StringMapConstIterator tmp = *this;
381  ++*this;
382  return tmp;
383  }
384 
385 protected:
387 
388 private:
389  void advancePastEmptyBuckets() {
390  while ((*ptr == nullptr) || (*ptr == StringMapImpl::getTombstoneVal())) {
391  ++ptr;
392  }
393  }
394 };
395 
396 template<typename T> class StringMapIterator : public StringMapConstIterator<T>
397 {
398 public:
400 
402  StringMapEntryBase** bucket, bool advance = false)
403  : StringMapConstIterator<T>(bucket, advance)
404  {
405  }
406 
408  return *static_cast<value_type*>(*this->ptr);
409  }
411  return static_cast<value_type*>(*this->ptr);
412  }
413 };
414 
415 // begin, end
416 template<typename T, bool C>
417 inline typename StringMap<T, C>::iterator begin(StringMap<T, C>& m) { return m.begin(); }
418 template<typename T, bool C>
419 inline typename StringMap<T, C>::const_iterator begin(const StringMap<T, C>& m) { return m.begin(); }
420 template<typename T, bool C>
421 inline typename StringMap<T, C>::iterator end (StringMap<T, C>& m) { return m.end(); }
422 template<typename T, bool C>
423 inline typename StringMap<T, C>::const_iterator end (const StringMap<T, C>& m) { return m.end(); }
424 
425 #endif
StringMapEntryBase ** ptr
Definition: StringMap.hh:386
static StringMapEntry * create(string_ref key, T v)
Definition: StringMap.hh:126
const char * getKeyData() const
Definition: StringMap.hh:161
T lookup(string_ref key) const
Definition: StringMap.hh:243
std::unique_ptr< openmsx::PolymorphicLoaderBase< Archive > > mapped_type
Definition: StringMap.hh:203
iterator end()
Definition: StringMap.hh:223
string_ref getKey() const
Definition: StringMap.hh:151
StringMap< T, C >::iterator begin(StringMap< T, C > &m)
Definition: StringMap.hh:417
const value_type * operator->() const
Definition: StringMap.hh:363
StringMap(unsigned initialSize=0)
Definition: StringMap.hh:209
#define unlikely(x)
Definition: likely.hh:15
unsigned numBuckets
Definition: StringMap.hh:83
const unsigned itemSize
Definition: StringMap.hh:86
unsigned * getHashTable() const
Definition: StringMap.hh:75
void init(unsigned size)
Definition: StringMap.cc:20
STL namespace.
const_iterator find(string_ref key) const
Definition: StringMap.hh:235
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
unsigned size() const
Definition: StringMap.hh:64
StringMapIterator< T > iterator
Definition: StringMap.hh:207
bool operator==(const StringMapConstIterator &rhs) const
Definition: StringMap.hh:367
StringMapConstIterator operator++(int)
Definition: StringMap.hh:379
StringMapImpl(unsigned itemSize, unsigned initSize)
Definition: StringMap.cc:5
bool empty() const
Definition: StringMap.hh:63
StringMap< T, C >::iterator end(StringMap< T, C > &m)
Definition: StringMap.hh:421
static StringMapEntryBase * getTombstoneVal()
Definition: StringMap.hh:58
void clear()
Definition: StringMap.hh:279
size_type size() const
Definition: string_ref.hh:55
const char * data() const
Definition: string_ref.hh:68
void advance(octet_iterator &it, distance_type n, octet_iterator end)
bool insert(value_type *keyValue)
Definition: StringMap.hh:260
value_type * operator->() const
Definition: StringMap.hh:410
const T & getValue() const
Definition: StringMap.hh:156
iterator begin()
Definition: StringMap.hh:217
unsigned lookupBucketFor(string_ref key)
Definition: StringMap.cc:119
StringMapEntryBase(unsigned len)
Definition: StringMap.hh:47
Non-templatized base class of StringMap.
Definition: StringMap.hh:55
const_iterator end() const
Definition: StringMap.hh:226
value_type & getOrCreateValue(string_ref key, T val=T())
Definition: StringMap.hh:300
const value_type & operator*() const
Definition: StringMap.hh:360
static StringMapEntry & GetStringMapEntryFromValue(T &v)
Definition: StringMap.hh:168
StringMapImpl2(unsigned itemSize, unsigned initSize)
Definition: StringMap.cc:113
static const StringMapEntry & GetStringMapEntryFromValue(const T &v)
Definition: StringMap.hh:175
string_ref first() const
Definition: StringMap.hh:154
static StringMapEntry & GetStringMapEntryFromKeyData(const char *keyData)
Definition: StringMap.hh:181
StringMapConstIterator(StringMapEntryBase **bucket, bool advance=false)
Definition: StringMap.hh:353
void rehashTable()
Definition: StringMap.cc:40
T & operator[](string_ref key)
Definition: StringMap.hh:249
unsigned getKeyLength() const
Definition: StringMap.hh:48
unsigned numTombstones
Definition: StringMap.hh:85
unsigned numItems
Definition: StringMap.hh:84
void erase(iterator i)
Definition: StringMap.hh:328
int findKey(string_ref key) const
Definition: StringMap.cc:166
size_type count(string_ref key) const
Definition: StringMap.hh:253
const_iterator begin() const
Definition: StringMap.hh:220
void setValue(const T &v)
Definition: StringMap.hh:158
iterator find(string_ref key)
Definition: StringMap.hh:230
StringMapIterator(StringMapEntryBase **bucket, bool advance=false)
Definition: StringMap.hh:401
void removeKey(StringMapEntryBase *V)
Definition: StringMap.cc:197
bool erase(string_ref key)
Definition: StringMap.hh:334
StringMapEntryBase ** theTable
Definition: StringMap.hh:82
value_type & operator*() const
Definition: StringMap.hh:407
bool operator!=(const StringMapConstIterator &rhs) const
Definition: StringMap.hh:370
StringMapConstIterator< T > const_iterator
Definition: StringMap.hh:206
unsigned getNumBuckets() const
Definition: StringMap.hh:62
StringMapConstIterator & operator++()
Definition: StringMap.hh:374
void destroy()
Definition: StringMap.hh:146