openMSX
TigerTree.cc
Go to the documentation of this file.
1 #include "TigerTree.hh"
2 #include "tiger.hh"
3 #include "Math.hh"
4 #include "MemBuffer.hh"
5 #include <map>
6 #include <cstring>
7 #include <cassert>
8 
9 namespace openmsx {
10 
11 static const size_t BLOCK_SIZE = 1024;
12 
14 {
17  size_t numNodes;
18  time_t time = -1;
19  size_t numNodesValid;
20 };
21 // Typically contains 0 or 1 element, and only rarely 2 or more. But we need
22 // the address of existing elements to remain stable when new elements are
23 // inserted. So still use std::map instead of std::vector.
24 static std::map<std::pair<size_t, std::string>, TTCacheEntry> ttCache;
25 
26 static size_t calcNumNodes(size_t dataSize)
27 {
28  auto numBlocks = (dataSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
29  return (numBlocks == 0) ? 1 : 2 * numBlocks - 1;
30 }
31 
32 static TTCacheEntry& getCacheEntry(
33  TTData& data, size_t dataSize, const std::string& name)
34 {
35  auto& result = ttCache[std::make_pair(dataSize, name)];
36  if (!data.isCacheStillValid(result.time)) { // note: has side effect
37  size_t numNodes = calcNumNodes(dataSize);
38  result.hash .resize(numNodes);
39  result.valid.resize(numNodes);
40  result.numNodes = numNodes;
41  memset(result.valid.data(), 0, numNodes); // all invalid
42  result.numNodesValid = 0;
43  }
44  return result;
45 }
46 
47 TigerTree::TigerTree(TTData& data_, size_t dataSize_, const std::string& name)
48  : data(data_)
49  , dataSize(dataSize_)
50  , entry(getCacheEntry(data, dataSize, name))
51 {
52 }
53 
54 const TigerHash& TigerTree::calcHash(const std::function<void(size_t, size_t)>& progressCallback)
55 {
56  return calcHash(getTop(), progressCallback);
57 }
58 
59 void TigerTree::notifyChange(size_t offset, size_t len, time_t time)
60 {
61  entry.time = time;
62 
63  assert((offset + len) <= dataSize);
64  if (len == 0) return;
65 
66  if (entry.valid[getTop().n]) {
67  entry.valid[getTop().n] = false; // set sentinel
68  entry.numNodesValid--;
69  }
70  auto first = offset / BLOCK_SIZE;
71  auto last = (offset + len - 1) / BLOCK_SIZE;
72  assert(first <= last); // requires len != 0
73  do {
74  auto node = getLeaf(first);
75  while (entry.valid[node.n]) {
76  entry.valid[node.n] = false;
77  entry.numNodesValid--;
78  node = getParent(node);
79  }
80  } while (++first <= last);
81 }
82 
83 const TigerHash& TigerTree::calcHash(Node node, const std::function<void(size_t, size_t)>& progressCallback)
84 {
85  auto n = node.n;
86  if (!entry.valid[n]) {
87  if (n & 1) {
88  // interior node
89  auto left = getLeftChild (node);
90  auto right = getRightChild(node);
91  auto& h1 = calcHash(left, progressCallback);
92  auto& h2 = calcHash(right, progressCallback);
93  tiger_int(h1, h2, entry.hash[n]);
94  } else {
95  // leaf node
96  size_t b = n * (BLOCK_SIZE / 2);
97  size_t l = dataSize - b;
98 
99  if (l >= BLOCK_SIZE) {
100  auto* d = data.getData(b, BLOCK_SIZE);
101  tiger_leaf(d, entry.hash[n]);
102  } else {
103  // partial last block
104  auto* d = data.getData(b, l);
105  auto backup = d[-1];
106  d[-1] = 0;
107  tiger(d - 1, l + 1, entry.hash[n]);
108  d[-1] = backup;
109  }
110  }
111  entry.valid[n] = true;
112  entry.numNodesValid++;
113  if (progressCallback) {
114  progressCallback(entry.numNodesValid, entry.numNodes);
115  }
116  }
117  return entry.hash[n];
118 }
119 
120 
121 // The TigerTree::nodes member variable stores a linearized binary tree. The
122 // linearization is done like in this example:
123 //
124 // 7 (level = 8)
125 // ----/ \---- .
126 // 3 11 (level = 4)
127 // / \ / \ .
128 // 1 5 9 | (level = 2)
129 // / \ / \ / \ | .
130 // 0 2 4 6 8 10 12 (level = 1)
131 //
132 // All leaf nodes are given even node values (0, 2, 4, .., 12). Leaf nodes have
133 // level=1. At the next level (level=2) leaf nodes are pair-wise combined in
134 // internal nodes. So nodes 0 and 2 are combined in node 1, 4+6->5 and 8+10->9.
135 // Leaf-node 12 cannot be paired (there is no leaf-node 14), so there's no
136 // corresponding node at level=2. The next level is level=4 (level values
137 // double for each higher level). At level=4 node 3 is the combination of 1 and
138 // 5 and 9+12->11. Note that 11 is a combination of two nodes from a different
139 // (lower) level. And finally, node 7 at level=8 combines 3+11.
140 //
141 // The following methods navigate in this tree.
142 
143 TigerTree::Node TigerTree::getTop() const
144 {
145  auto n = Math::floodRight(entry.numNodes / 2);
146  return {n, n + 1};
147 }
148 
149 TigerTree::Node TigerTree::getLeaf(size_t block) const
150 {
151  assert((2 * block) < entry.numNodes);
152  return {2 * block, 1};
153 }
154 
155 TigerTree::Node TigerTree::getParent(Node node) const
156 {
157  assert(node.n < entry.numNodes);
158  do {
159  node.n = (node.n & ~(2 * node.l)) + node.l;
160  node.l *= 2;
161  } while (node.n >= entry.numNodes);
162  return node;
163 }
164 
165 TigerTree::Node TigerTree::getLeftChild(Node node) const
166 {
167  assert(node.n < entry.numNodes);
168  assert(node.l > 1);
169  node.l /= 2;
170  node.n -= node.l;
171  return node;
172 }
173 
174 TigerTree::Node TigerTree::getRightChild(Node node) const
175 {
176  assert(node.n < entry.numNodes);
177  while (true) {
178  assert(node.l > 1);
179  node.l /= 2;
180  auto r = node.n + node.l;
181  if (r < entry.numNodes) return {r, node.l};
182  }
183 }
184 
185 } // namespace openmsx
virtual uint8_t * getData(size_t offset, size_t size)=0
Return the requested portion of the to-be-hashed data block.
This struct represents the result of a tiger-hash.
Definition: tiger.hh:27
The TigerTree class will query the to-be-hashed data via this abstract interface. ...
Definition: TigerTree.hh:43
void notifyChange(size_t offset, size_t len, time_t time)
Inform this calculator about changes in the input data.
Definition: TigerTree.cc:59
constexpr T floodRight(T x) noexcept
Returns the smallest number of the form 2^n-1 that is greater or equal to the given number...
Definition: Math.hh:68
MemBuffer< bool > valid
Definition: TigerTree.cc:16
TigerTree(TTData &data, size_t dataSize, const std::string &name)
Create TigerTree calculator for the given (abstract) data block of given size.
Definition: TigerTree.cc:47
void tiger(const uint8_t *str, size_t length, TigerHash &result)
Generic function to calculate a tiger-hash.
Definition: tiger.cc:671
void tiger_int(const TigerHash &h0, const TigerHash &h1, TigerHash &result)
Use for tiger-tree internal node hash calculations.
Definition: tiger.cc:709
constexpr auto data(C &c) -> decltype(c.data())
Definition: span.hh:69
void tiger_leaf(uint8_t data[1024], TigerHash &result)
Use for tiger-tree leaf node hash calculations.
Definition: tiger.cc:729
MemBuffer< TigerHash > hash
Definition: TigerTree.cc:15
const TigerHash & calcHash(const std::function< void(size_t, size_t)> &progressCallback)
Calculate the hash value.
Definition: TigerTree.cc:54
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
virtual bool isCacheStillValid(time_t &time)=0
Because TTH calculation of a large file takes some time (a few 1/10s for a harddisk image) we try to ...
This class manages the lifetime of a block of memory.
Definition: MemBuffer.hh:37