openMSX
StringMap.cc
Go to the documentation of this file.
1 #include "StringMap.hh"
2 #include "StringOp.hh"
3 #include "xxhash.hh"
4 
5 StringMapImpl::StringMapImpl(unsigned itemSize_, unsigned initSize)
6  : itemSize(itemSize_)
7 {
8  if (initSize) {
9  // If a size is specified, initialize the table with that many buckets.
10  init(initSize);
11  } else {
12  // Otherwise, initialize it with zero buckets to avoid the allocation.
13  theTable = nullptr;
14  numBuckets = 0;
15  numItems = 0;
16  numTombstones = 0;
17  }
18 }
19 
20 void StringMapImpl::init(unsigned initSize)
21 {
22  assert(((initSize & (initSize - 1)) == 0) &&
23  "Init Size must be a power of 2 or zero!");
24  numBuckets = initSize;
25  numItems = 0;
26  numTombstones = 0;
27 
28  theTable = static_cast<StringMapEntryBase**>(calloc(
29  numBuckets + 1,
30  sizeof(StringMapEntryBase**) + sizeof(unsigned)));
31  if (unlikely(!theTable)) {
32  throw std::bad_alloc();
33  }
34 
35  // Allocate one extra bucket, set it to look filled so the iterators
36  // stop at end.
37  theTable[numBuckets] = reinterpret_cast<StringMapEntryBase*>(2);
38 }
39 
41 {
42  // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
43  // the buckets are empty (meaning that many are filled with tombstones),
44  // grow/rehash the table.
45  unsigned newSize;
46  if ((numItems * 4) > (numBuckets * 3)) {
47  newSize = numBuckets * 2; // double size
48  } else if (numBuckets - (numItems + numTombstones) < (numBuckets / 8)) {
49  newSize = numBuckets; // same size, only clear tombstones
50  } else {
51  return;
52  }
53 
54  // Allocate one extra bucket (see init()).
55  auto newTableArray = static_cast<StringMapEntryBase**>(
56  calloc(newSize + 1,
57  sizeof(StringMapEntryBase*) + sizeof(unsigned)));
58  if (unlikely(!newTableArray)) {
59  throw std::bad_alloc();
60  }
61  newTableArray[newSize] = reinterpret_cast<StringMapEntryBase*>(2);
62  auto newHashArray = reinterpret_cast<unsigned*>(newTableArray + newSize + 1);
63 
64  // Rehash all the items into their new buckets. Luckily we already have
65  // the hash values available, so we don't have to rehash any strings.
66  unsigned* hashTable = getHashTable();
67  for (unsigned i = 0; i != numBuckets; ++i) {
68  StringMapEntryBase* bucket = theTable[i];
69  if (bucket && (bucket != getTombstoneVal())) {
70  unsigned fullHash = hashTable[i];
71  unsigned newBucket = fullHash & (newSize - 1);
72  if (!newTableArray[newBucket]) {
73  // Fast case, bucket available.
74  newTableArray[newBucket] = bucket;
75  newHashArray [newBucket] = fullHash;
76  } else {
77  // Otherwise probe for a spot (quadratic).
78  unsigned probeSize = 1;
79  do {
80  newBucket = (newBucket + probeSize++) & (newSize - 1);
81  } while (newTableArray[newBucket]);
82  newTableArray[newBucket] = bucket;
83  newHashArray[newBucket] = fullHash;
84  }
85  }
86  }
87 
88  free(theTable);
89  theTable = newTableArray;
90  numBuckets = newSize;
91  numTombstones = 0;
92 }
93 
94 
95 template<bool CASE_SENSITIVE>
96 static inline uint32_t hash(string_ref key)
97 {
98  return (CASE_SENSITIVE) ? xxhash(key) : xxhash_case(key);
99 }
100 
101 template<bool CASE_SENSITIVE>
102 static inline bool equal(string_ref x, string_ref y)
103 {
104  if (CASE_SENSITIVE) {
105  return x == y;
106  } else {
107  StringOp::casecmp cmp;
108  return cmp(x, y);
109  }
110 }
111 
112 template<bool CASE_SENSITIVE>
113 StringMapImpl2<CASE_SENSITIVE>::StringMapImpl2(unsigned itemSize, unsigned initSize)
114  : StringMapImpl(itemSize, initSize)
115 {
116 }
117 
118 template<bool CASE_SENSITIVE>
120 {
121  if (numBuckets == 0) { // Hash table unallocated so far?
122  init(16);
123  }
124  unsigned fullHashValue = hash<CASE_SENSITIVE>(name);
125  unsigned bucketNo = fullHashValue & (numBuckets - 1);
126  unsigned* hashTable = getHashTable();
127 
128  unsigned probeAmt = 1;
129  int firstTombstone = -1;
130  while (true) {
131  StringMapEntryBase* bucketItem = theTable[bucketNo];
132  if (!bucketItem) {
133  // Empty bucket, this means the key isn't in the table
134  // yet. If we found a tombstone earlier, then reuse
135  // that instead of using this empty bucket.
136  if (firstTombstone != -1) {
137  hashTable[firstTombstone] = fullHashValue;
138  return firstTombstone;
139  }
140  hashTable[bucketNo] = fullHashValue;
141  return bucketNo;
142  } else if (bucketItem == getTombstoneVal()) {
143  // Skip tombstones, but remember the first one we see.
144  if (firstTombstone == -1) firstTombstone = bucketNo;
145  } else if (hashTable[bucketNo] == fullHashValue) {
146  // If the full hash value matches, check deeply for a
147  // match. The common case here is that we are only
148  // looking at the buckets (for item info being non-nullptr
149  // and for the full hash value) not at the items. This
150  // is important for cache locality.
151  auto itemStr = reinterpret_cast<char*>(bucketItem) + itemSize;
152  if (equal<CASE_SENSITIVE>(
153  name, string_ref(itemStr, bucketItem->getKeyLength()))) {
154  return bucketNo;
155  }
156  }
157 
158  // Use quadratic probing, it has fewer clumping artifacts than linear
159  // probing and has good cache behavior in the common case.
160  bucketNo = (bucketNo + probeAmt) & (numBuckets - 1);
161  ++probeAmt;
162  }
163 }
164 
165 template<bool CASE_SENSITIVE>
167 {
168  if (numBuckets == 0) return -1;
169 
170  unsigned fullHashValue = hash<CASE_SENSITIVE>(key);
171  unsigned bucketNo = fullHashValue & (numBuckets - 1);
172  unsigned* hashTable = getHashTable();
173 
174  unsigned probeAmt = 1;
175  while (true) {
176  StringMapEntryBase* bucketItem = theTable[bucketNo];
177  if (!bucketItem) {
178  // Empty bucket, key isn't in the table yet.
179  return -1;
180  } else if (bucketItem == getTombstoneVal()) {
181  // Ignore tombstones.
182  } else if (hashTable[bucketNo] == fullHashValue) {
183  // Hash matches, compare full string.
184  auto itemStr = reinterpret_cast<char*>(bucketItem) + itemSize;
185  if (equal<CASE_SENSITIVE>(
186  key, string_ref(itemStr, bucketItem->getKeyLength()))) {
187  return bucketNo;
188  }
189  }
190  // Quadratic probing.
191  bucketNo = (bucketNo + probeAmt) & (numBuckets - 1);
192  ++probeAmt;
193  }
194 }
195 
196 template<bool CASE_SENSITIVE>
198 {
199  auto vStr = reinterpret_cast<char*>(v) + itemSize;
200  StringMapEntryBase* v2 = removeKey(string_ref(vStr, v->getKeyLength()));
201  assert(v == v2 && "Didn't find key?"); (void)v2;
202 }
203 
204 template<bool CASE_SENSITIVE>
206 {
207  int bucket = findKey(key);
208  if (bucket == -1) return nullptr;
209 
210  StringMapEntryBase* result = theTable[bucket];
211  theTable[bucket] = getTombstoneVal();
212  --numItems;
213  ++numTombstones;
214  assert(numItems + numTombstones <= numBuckets);
215  return result;
216 }
217 
218 template class StringMapImpl2<true>;
219 template class StringMapImpl2<false>;
uint32_t xxhash_case(string_ref key)
Definition: xxhash.hh:146
#define unlikely(x)
Definition: likely.hh:15
unsigned numBuckets
Definition: StringMap.hh:83
unsigned * getHashTable() const
Definition: StringMap.hh:75
void init(unsigned size)
Definition: StringMap.cc:20
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
StringMapImpl(unsigned itemSize, unsigned initSize)
Definition: StringMap.cc:5
static StringMapEntryBase * getTombstoneVal()
Definition: StringMap.hh:58
unsigned lookupBucketFor(string_ref key)
Definition: StringMap.cc:119
Non-templatized base class of StringMap.
Definition: StringMap.hh:55
StringMapImpl2(unsigned itemSize, unsigned initSize)
Definition: StringMap.cc:113
void rehashTable()
Definition: StringMap.cc:40
unsigned getKeyLength() const
Definition: StringMap.hh:48
uint32_t xxhash(string_ref key)
Definition: xxhash.hh:142
unsigned numTombstones
Definition: StringMap.hh:85
unsigned numItems
Definition: StringMap.hh:84
int findKey(string_ref key) const
Definition: StringMap.cc:166
void removeKey(StringMapEntryBase *V)
Definition: StringMap.cc:197
StringMapEntryBase ** theTable
Definition: StringMap.hh:82