openMSX
FilePool.cc
Go to the documentation of this file.
1 #include "FilePool.hh"
2 #include "File.hh"
3 #include "FileException.hh"
4 #include "FileContext.hh"
5 #include "FileOperations.hh"
6 #include "TclObject.hh"
7 #include "ReadDir.hh"
8 #include "Date.hh"
9 #include "CommandController.hh"
10 #include "CommandException.hh"
11 #include "Display.hh"
12 #include "EventDistributor.hh"
13 #include "CliComm.hh"
14 #include "Reactor.hh"
15 #include "Timer.hh"
16 #include "StringOp.hh"
17 #include "memory.hh"
18 #include "sha1.hh"
19 #include "stl.hh"
20 #include <fstream>
21 #include <cassert>
22 
23 using std::ifstream;
24 using std::get;
25 using std::make_tuple;
26 using std::ofstream;
27 using std::pair;
28 using std::string;
29 using std::vector;
30 using std::unique_ptr;
31 
32 namespace openmsx {
33 
34 const char* const FILE_CACHE = "/.filecache";
35 
36 static string initialFilePoolSettingValue()
37 {
38  TclObject result;
39 
40  for (auto& p : systemFileContext().getPaths()) {
41  TclObject entry1;
42  entry1.addListElement("-path");
43  entry1.addListElement(FileOperations::join(p, "systemroms"));
44  entry1.addListElement("-types");
45  entry1.addListElement("system_rom");
46  result.addListElement(entry1);
47 
48  TclObject entry2;
49  entry2.addListElement("-path");
50  entry2.addListElement(FileOperations::join(p, "software"));
51  entry2.addListElement("-types");
52  entry2.addListElement("rom disk tape");
53  result.addListElement(entry2);
54  }
55  return result.getString().str();
56 }
57 
59  : filePoolSetting(
60  controller, "__filepool",
61  "This is an internal setting. Don't change this directly, "
62  "instead use the 'filepool' command.",
63  initialFilePoolSettingValue())
64  , reactor(reactor_)
65  , quit(false)
66 {
67  filePoolSetting.attach(*this);
69  readSha1sums();
70  needWrite = false;
71 }
72 
74 {
75  if (needWrite) {
76  writeSha1sums();
77  }
79  filePoolSetting.detach(*this);
80 }
81 
82 void FilePool::insert(const Sha1Sum& sum, time_t time, const string& filename)
83 {
84  auto it = upper_bound(begin(pool), end(pool), sum,
86  pool.insert(it, make_tuple(sum, time, filename));
87  needWrite = true;
88 }
89 
90 void FilePool::remove(Pool::iterator it)
91 {
92  pool.erase(it);
93  needWrite = true;
94 }
95 
96 // Change the sha1sum of the element pointed to by 'it' into 'newSum'.
97 // Also re-arrange the items so that pool remains sorted on sha1sum. Internally
98 // this method doesn't actually sort, it merely rotates the elements.
99 // Returns false if the new position is before (or at) the old position.
100 // Returns true if the new position is after the old position.
101 bool FilePool::adjust(Pool::iterator it, const Sha1Sum& newSum)
102 {
103  needWrite = true;
104  auto newIt = upper_bound(begin(pool), end(pool), newSum,
106  get<0>(*it) = newSum; // update sum
107  if (newIt > it) {
108  // move to back
109  rotate(it, it + 1, newIt);
110  return true;
111  } else {
112  if (newIt < it) {
113  // move to front
114  rotate(newIt, it, it + 1);
115  } else {
116  // (unlikely) sha1sum has changed, but after
117  // resorting item would remain in the same
118  // position
119  }
120  return false;
121  }
122 }
123 
124 static bool parse(const string& line, Sha1Sum& sha1, time_t& time, string& filename)
125 {
126  if (line.size() <= 68) return false;
127 
128  try {
129  sha1.parse40(line.data());
130  } catch (MSXException& /*e*/) {
131  return false;
132  }
133 
134  time = Date::fromString(line.data() + 42);
135  if (time == time_t(-1)) return false;
136 
137  filename.assign(line, 68, line.size());
138  return true;
139 }
140 
141 void FilePool::readSha1sums()
142 {
143  assert(pool.empty());
144 
145  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
146  ifstream file(cacheFile.c_str());
147  string line;
148  Sha1Sum sum;
149  string filename;
150  time_t time;
151  while (file.good()) {
152  getline(file, line);
153  if (parse(line, sum, time, filename)) {
154  pool.emplace_back(sum, time, filename);
155  }
156  }
157 
158  if (!std::is_sorted(begin(pool), end(pool), LessTupleElement<0>())) {
159  // This should _rarely_ happen. In fact it should only happen
160  // when .filecache was manually edited. Though because it's
161  // very important that pool is indeed sorted I've added this
162  // safety mechanism.
163  sort(begin(pool), end(pool), LessTupleElement<0>());
164  }
165 }
166 
167 void FilePool::writeSha1sums()
168 {
169  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
170  ofstream file;
171  FileOperations::openofstream(file, cacheFile);
172  if (!file.is_open()) {
173  return;
174  }
175  for (auto& p : pool) {
176  file << get<0>(p).toString() << " " // sum
177  << Date::toString(get<1>(p)) << " " // date
178  << get<2>(p) // filename
179  << '\n';
180  }
181 }
182 
183 static int parseTypes(Interpreter& interp, const TclObject& list)
184 {
185  int result = 0;
186  unsigned num = list.getListLength(interp);
187  for (unsigned i = 0; i < num; ++i) {
188  string_ref elem = list.getListIndex(interp, i).getString();
189  if (elem == "system_rom") {
190  result |= FilePool::SYSTEM_ROM;
191  } else if (elem == "rom") {
192  result |= FilePool::ROM;
193  } else if (elem == "disk") {
194  result |= FilePool::DISK;
195  } else if (elem == "tape") {
196  result |= FilePool::TAPE;
197  } else {
198  throw CommandException("Unknown type: " + elem);
199  }
200  }
201  return result;
202 }
203 
204 void FilePool::update(const Setting& setting)
205 {
206  assert(&setting == &filePoolSetting); (void)setting;
207  getDirectories(); // check for syntax errors
208 }
209 
210 FilePool::Directories FilePool::getDirectories() const
211 {
212  Directories result;
213  auto& interp = filePoolSetting.getInterpreter();
214  const TclObject& all = filePoolSetting.getValue();
215  unsigned numLines = all.getListLength(interp);
216  for (unsigned i = 0; i < numLines; ++i) {
217  Entry entry;
218  bool hasPath = false;
219  entry.types = 0;
220  TclObject line = all.getListIndex(interp, i);
221  unsigned numItems = line.getListLength(interp);
222  if (numItems & 1) {
223  throw CommandException(
224  "Expected a list with an even number "
225  "of elements, but got " + line.getString());
226  }
227  for (unsigned j = 0; j < numItems; j += 2) {
228  string_ref name = line.getListIndex(interp, j + 0).getString();
229  TclObject value = line.getListIndex(interp, j + 1);
230  if (name == "-path") {
231  entry.path = value.getString().str();
232  hasPath = true;
233  } else if (name == "-types") {
234  entry.types = parseTypes(interp, value);
235  } else {
236  throw CommandException(
237  "Unknown item: " + name);
238  }
239  }
240  if (!hasPath) {
241  throw CommandException(
242  "Missing -path item: " + line.getString());
243  }
244  if (entry.types == 0) {
245  throw CommandException(
246  "Missing -types item: " + line.getString());
247  }
248  result.push_back(entry);
249  }
250  return result;
251 }
252 
253 File FilePool::getFile(FileType fileType, const Sha1Sum& sha1sum)
254 {
255  File result = getFromPool(sha1sum);
256  if (result.is_open()) return result;
257 
258  // not found in cache, need to scan directories
259  ScanProgress progress;
260  progress.lastTime = Timer::getTime();
261  progress.amountScanned = 0;
262 
263  Directories directories;
264  try {
265  directories = getDirectories();
266  } catch (CommandException& e) {
267  reactor.getCliComm().printWarning(
268  "Error while parsing '__filepool' setting" + e.getMessage());
269  }
270  for (auto& d : directories) {
271  if (d.types & fileType) {
272  string path = FileOperations::expandTilde(d.path);
273  result = scanDirectory(sha1sum, path, d.path, progress);
274  if (result.is_open()) return result;
275  }
276  }
277 
278  return result; // not found
279 }
280 
281 static void reportProgress(const string& filename, size_t percentage,
282  Reactor& reactor)
283 {
284  reactor.getCliComm().printProgress(
285  "Calculating SHA1 sum for " + filename + "... " + StringOp::toString(percentage) + '%');
286  reactor.getDisplay().repaint();
287 }
288 
289 static Sha1Sum calcSha1sum(File& file, Reactor& reactor)
290 {
291  // Calculate sha1 in several steps so that we can show progress
292  // information. We take a fixed step size for an efficient calculation.
293  static const size_t STEP_SIZE = 1024 * 1024; // 1MB
294 
295  size_t size;
296  const byte* data = file.mmap(size);
297  string filename = file.getOriginalName();
298 
299  SHA1 sha1;
300  size_t done = 0;
301  size_t remaining = size;
302  auto lastShowedProgress = Timer::getTime();
303  bool everShowedProgress = false;
304 
305  // Loop over all-but-the last blocks. For small files this loop is skipped.
306  while (remaining > STEP_SIZE) {
307  sha1.update(&data[done], STEP_SIZE);
308  done += STEP_SIZE;
309  remaining -= STEP_SIZE;
310 
311  auto now = Timer::getTime();
312  if ((now - lastShowedProgress) > 1000000) {
313  reportProgress(filename, (100 * done) / size, reactor);
314  lastShowedProgress = now;
315  everShowedProgress = true;
316  }
317  }
318  // last block
319  sha1.update(&data[done], remaining);
320  if (everShowedProgress) {
321  reportProgress(filename, 100, reactor);
322  }
323  return sha1.digest();
324 }
325 
326 File FilePool::getFromPool(const Sha1Sum& sha1sum)
327 {
328  auto bound = equal_range(begin(pool), end(pool), sha1sum,
330  // use indices instead of iterators
331  auto i = distance(begin(pool), bound.first);
332  auto last = distance(begin(pool), bound.second);
333  while (i != last) {
334  auto it = begin(pool) + i;
335  auto& time = get<1>(*it);
336  const auto& filename = get<2>(*it);
337  try {
338  File file(filename);
339  auto newTime = file.getModificationDate();
340  if (time == newTime) {
341  // When modification time is unchanged, assume
342  // sha1sum is also unchanged. So avoid
343  // expensive sha1sum calculation.
344  return file;
345  }
346  time = newTime; // update timestamp
347  needWrite = true;
348  auto newSum = calcSha1sum(file, reactor);
349  if (newSum == sha1sum) {
350  // Modification time was changed, but
351  // (recalculated) sha1sum is still the same.
352  return file;
353  }
354  // Sha1sum has changed: update sha1sum, move entry to
355  // new position new sum and continue searching.
356  if (adjust(it, newSum)) {
357  // after
358  --last; // no ++i
359  } else {
360  // before (or at)
361  ++i;
362  }
363  } catch (FileException&) {
364  // Error reading file: remove from db and continue
365  // searching.
366  remove(it);
367  --last;
368  }
369  }
370  return File(); // not found
371 }
372 
373 File FilePool::scanDirectory(
374  const Sha1Sum& sha1sum, const string& directory, const string& poolPath,
375  ScanProgress& progress)
376 {
377  ReadDir dir(directory);
378  while (dirent* d = dir.getEntry()) {
379  if (quit) {
380  // Scanning can take a long time. Allow to exit
381  // openmsx when it takes too long. Stop scanning
382  // by pretending we didn't find the file.
383  return File();
384  }
385  string file = d->d_name;
386  string path = directory + '/' + file;
388  if (FileOperations::getStat(path, st)) {
389  File result;
391  result = scanFile(sha1sum, path, st, poolPath, progress);
392  } else if (FileOperations::isDirectory(st)) {
393  if ((file != ".") && (file != "..")) {
394  result = scanDirectory(sha1sum, path, poolPath, progress);
395  }
396  }
397  if (result.is_open()) return result;
398  }
399  }
400  return File(); // not found
401 }
402 
403 File FilePool::scanFile(const Sha1Sum& sha1sum, const string& filename,
404  const FileOperations::Stat& st, const string& poolPath,
405  ScanProgress& progress)
406 {
407  ++progress.amountScanned;
408  // Periodically send a progress message with the current filename
409  auto now = Timer::getTime();
410  if (now > (progress.lastTime + 250000)) { // 4Hz
411  progress.lastTime = now;
412  reactor.getCliComm().printProgress("Searching for file with sha1sum " +
413  sha1sum.toString() + "...\nIndexing filepool " + poolPath +
414  ": [" + StringOp::toString(progress.amountScanned) + "]: " +
415  filename.substr(poolPath.size()));
416  }
417 
418  // deliverEvents() is relatively cheap when there are no events to
419  // deliver, so it's ok to call on each file.
421 
422  auto it = findInDatabase(filename);
423  if (it == end(pool)) {
424  // not in pool
425  try {
426  File file(filename);
427  auto sum = calcSha1sum(file, reactor);
428  auto time = FileOperations::getModificationDate(st);
429  insert(sum, time, filename);
430  if (sum == sha1sum) {
431  return file;
432  }
433  } catch (FileException&) {
434  // ignore
435  }
436  } else {
437  // already in pool
438  assert(filename == get<2>(*it));
439  try {
440  auto time = FileOperations::getModificationDate(st);
441  if (time == get<1>(*it)) {
442  // db is still up to date
443  if (get<0>(*it) == sha1sum) {
444  return File(filename);
445  }
446  } else {
447  // db outdated
448  File file(filename);
449  auto sum = calcSha1sum(file, reactor);
450  get<1>(*it) = time;
451  adjust(it, sum);
452  if (sum == sha1sum) {
453  return file;
454  }
455  }
456  } catch (FileException&) {
457  // error reading file, remove from db
458  remove(it);
459  }
460  }
461  return File(); // not found
462 }
463 
464 FilePool::Pool::iterator FilePool::findInDatabase(const string& filename)
465 {
466  // Linear search in pool for filename.
467  // Search from back to front because often, soon after this search, we
468  // will insert/remove an element from the vector. This requires
469  // shifting all elements in the vector starting from a certain
470  // position. Starting the search from the back increases the likelihood
471  // that the to-be-shifted elements are already in the memory cache.
472  for (auto it = pool.rbegin(); it != pool.rend(); ++it) {
473  if (get<2>(*it) == filename) {
474  return it.base() - 1;
475  }
476  }
477  return end(pool); // not found
478 }
479 
481 {
482  auto time = file.getModificationDate();
483  const auto& filename = file.getURL();
484 
485  auto it = findInDatabase(filename);
486  if ((it != end(pool)) && (get<1>(*it) == time)) {
487  // in database and modification time matches,
488  // assume sha1sum also matches
489  return get<0>(*it);
490  }
491 
492  // not in database or timestamp mismatch
493  auto sum = calcSha1sum(file, reactor);
494  if (it == end(pool)) {
495  // was not yet in database, insert new entry
496  insert(sum, time, filename);
497  } else {
498  // was already in database, but with wrong timestamp (and sha1sum)
499  get<1>(*it) = time;
500  adjust(it, sum);
501  }
502  return sum;
503 }
504 
505 int FilePool::signalEvent(const std::shared_ptr<const Event>& event)
506 {
507  (void)event; // avoid warning for non-assert compiles
508  assert(event->getType() == OPENMSX_QUIT_EVENT);
509  quit = true;
510  return 0;
511 }
512 
513 } // namespace openmsx
Contains the main loop of openMSX.
Definition: Reactor.hh:61
bool isRegularFile(const Stat &st)
void update(const uint8_t *data, size_t len)
Incrementally calculate the hash value.
Definition: sha1.cc:222
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:167
bool is_open() const
Return true iff this file handle refers to an open file.
Definition: File.hh:57
std::string str() const
Definition: string_ref.cc:12
void registerEventListener(EventType type, EventListener &listener, Priority priority=OTHER)
Registers a given object to receive certain events.
string_ref getString() const
Definition: TclObject.cc:139
File getFile(FileType fileType, const Sha1Sum &sha1sum)
Search file with the given sha1sum.
Definition: FilePool.cc:253
void unregisterEventListener(EventType type, EventListener &listener)
Unregisters a previously registered event listener.
FileContext systemFileContext()
Definition: FileContext.cc:149
string toString(long long a)
Definition: StringOp.cc:156
void openofstream(std::ofstream &stream, const std::string &filename)
Open an ofstream in a platform-independent manner.
void printWarning(string_ref message)
Definition: CliComm.cc:28
FilePool(CommandController &controler, Reactor &reactor)
Definition: FilePool.cc:58
string join(string_ref part1, string_ref part2)
Join two paths.
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
T sum(const vecN< N, T > &x)
Definition: gl_vec.hh:289
EventDistributor & getEventDistributor()
Definition: Reactor.hh:76
void deliverEvents()
This actually delivers the events.
bool getStat(string_ref filename_, Stat &st)
Call stat() and return the stat structure.
void attach(Observer< T > &observer)
Definition: Subject.hh:52
void repaint()
Redraw the display.
Definition: Display.cc:317
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:19
Sha1Sum digest()
Get the final hash.
Definition: sha1.cc:259
TclObject getListIndex(Interpreter &interp, unsigned index) const
Definition: TclObject.cc:170
unsigned getListLength(Interpreter &interp) const
Definition: TclObject.cc:152
const std::string & getMessage() const
Definition: MSXException.hh:14
string getUserDataDir()
Get the openMSX data dir in the user&#39;s home directory.
time_t getModificationDate()
Get the date/time of last modification.
Definition: File.cc:149
Interpreter & getInterpreter() const
Definition: Setting.cc:162
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
Helper class to perform a sha1 calculation.
Definition: sha1.hh:71
std::iterator_traits< octet_iterator >::difference_type distance(octet_iterator first, octet_iterator last)
const std::string getOriginalName()
Get Original filename for this object.
Definition: File.cc:138
unsigned char byte
8 bit unsigned integer
Definition: openmsx.hh:25
std::string toString() const
Definition: sha1.cc:131
void addListElement(string_ref element)
Definition: TclObject.cc:69
bool isDirectory(const Stat &st)
void parse40(const char *str)
Definition: sha1.cc:114
const char *const FILE_CACHE
Definition: FilePool.cc:34
void printProgress(string_ref message)
Definition: CliComm.cc:38
uint8_t * data()
Sha1Sum getSha1Sum(File &file)
Calculate sha1sum for the given File object.
Definition: FilePool.cc:480
const std::string getURL() const
Returns the URL of this file object.
Definition: File.cc:128
time_t fromString(const char *p)
Definition: Date.cc:31
const TclObject & getValue() const finaloverride
Gets the current value of this setting as a TclObject.
Definition: Setting.hh:133
size_t size() const
Simple wrapper around openmdir() / readdir() / closedir() functions.
Definition: ReadDir.hh:15
const byte * mmap(size_t &size)
Map file in memory.
Definition: File.cc:93
void detach(Observer< T > &observer)
Definition: Subject.hh:58
string expandTilde(string_ref path)
Expand the &#39;~&#39; character to the users home directory.
CliComm & getCliComm()
Definition: Reactor.cc:265
Display & getDisplay()
Definition: Reactor.hh:80
uint64_t getTime()
Get current (real) time in us.
Definition: Timer.cc:8
mat4 rotate(float angle, const vec3 &axis)
Definition: gl_transform.hh:56
string_ref::const_iterator begin(const string_ref &x)
Definition: string_ref.hh:166
struct dirent * getEntry()
Get directory entry for next file.
Definition: ReadDir.cc:17
time_t getModificationDate(const Stat &st)
Get the date/time of last modification.