openMSX
FilePool.cc
Go to the documentation of this file.
1 #include "FilePool.hh"
2 #include "File.hh"
3 #include "FileException.hh"
4 #include "FileContext.hh"
5 #include "FileOperations.hh"
6 #include "TclObject.hh"
7 #include "ReadDir.hh"
8 #include "Date.hh"
9 #include "CommandController.hh"
10 #include "CommandException.hh"
11 #include "Display.hh"
12 #include "EventDistributor.hh"
13 #include "CliComm.hh"
14 #include "Reactor.hh"
15 #include "Timer.hh"
16 #include "StringOp.hh"
17 #include "memory.hh"
18 #include "sha1.hh"
19 #include "stl.hh"
20 #include <fstream>
21 #include <cassert>
22 
23 using std::ifstream;
24 using std::get;
25 using std::make_tuple;
26 using std::ofstream;
27 using std::string;
28 using std::vector;
29 
30 namespace openmsx {
31 
32 class Sha1SumCommand final : public Command
33 {
34 public:
35  Sha1SumCommand(CommandController& commandController, FilePool& filePool);
36  void execute(array_ref<TclObject> tokens, TclObject& result) override;
37  string help(const vector<string>& tokens) const override;
38  void tabCompletion(vector<string>& tokens) const override;
39 private:
40  FilePool& filePool;
41 };
42 
43 
44 const char* const FILE_CACHE = "/.filecache";
45 
46 static string initialFilePoolSettingValue()
47 {
48  TclObject result;
49 
50  for (auto& p : systemFileContext().getPaths()) {
51  TclObject entry1;
52  entry1.addListElement("-path");
53  entry1.addListElement(FileOperations::join(p, "systemroms"));
54  entry1.addListElement("-types");
55  entry1.addListElement("system_rom");
56  result.addListElement(entry1);
57 
58  TclObject entry2;
59  entry2.addListElement("-path");
60  entry2.addListElement(FileOperations::join(p, "software"));
61  entry2.addListElement("-types");
62  entry2.addListElement("rom disk tape");
63  result.addListElement(entry2);
64  }
65  return result.getString().str();
66 }
67 
69  : filePoolSetting(
70  controller, "__filepool",
71  "This is an internal setting. Don't change this directly, "
72  "instead use the 'filepool' command.",
73  initialFilePoolSettingValue())
74  , reactor(reactor_)
75  , quit(false)
76 {
77  filePoolSetting.attach(*this);
79  readSha1sums();
80  needWrite = false;
81 
82  sha1SumCommand = make_unique<Sha1SumCommand>(controller, *this);
83 }
84 
86 {
87  if (needWrite) {
88  writeSha1sums();
89  }
91  filePoolSetting.detach(*this);
92 }
93 
94 void FilePool::insert(const Sha1Sum& sum, time_t time, const string& filename)
95 {
96  auto it = upper_bound(begin(pool), end(pool), sum,
98  pool.insert(it, make_tuple(sum, time, filename));
99  needWrite = true;
100 }
101 
102 void FilePool::remove(Pool::iterator it)
103 {
104  pool.erase(it);
105  needWrite = true;
106 }
107 
108 // Change the sha1sum of the element pointed to by 'it' into 'newSum'.
109 // Also re-arrange the items so that pool remains sorted on sha1sum. Internally
110 // this method doesn't actually sort, it merely rotates the elements.
111 // Returns false if the new position is before (or at) the old position.
112 // Returns true if the new position is after the old position.
113 bool FilePool::adjust(Pool::iterator it, const Sha1Sum& newSum)
114 {
115  needWrite = true;
116  auto newIt = upper_bound(begin(pool), end(pool), newSum,
118  get<0>(*it) = newSum; // update sum
119  if (newIt > it) {
120  // move to back
121  rotate(it, it + 1, newIt);
122  return true;
123  } else {
124  if (newIt < it) {
125  // move to front
126  rotate(newIt, it, it + 1);
127  } else {
128  // (unlikely) sha1sum has changed, but after
129  // resorting item would remain in the same
130  // position
131  }
132  return false;
133  }
134 }
135 
136 static bool parse(const string& line, Sha1Sum& sha1, time_t& time, string& filename)
137 {
138  if (line.size() <= 68) return false;
139 
140  try {
141  sha1.parse40(line.data());
142  } catch (MSXException& /*e*/) {
143  return false;
144  }
145 
146  time = Date::fromString(line.data() + 42);
147  if (time == time_t(-1)) return false;
148 
149  filename.assign(line, 68, line.size());
150  return true;
151 }
152 
153 void FilePool::readSha1sums()
154 {
155  assert(pool.empty());
156 
157  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
158  ifstream file(cacheFile.c_str());
159  string line;
160  Sha1Sum sum;
161  string filename;
162  time_t time;
163  while (file.good()) {
164  getline(file, line);
165  if (parse(line, sum, time, filename)) {
166  pool.emplace_back(sum, time, filename);
167  }
168  }
169 
170  if (!std::is_sorted(begin(pool), end(pool), LessTupleElement<0>())) {
171  // This should _rarely_ happen. In fact it should only happen
172  // when .filecache was manually edited. Though because it's
173  // very important that pool is indeed sorted I've added this
174  // safety mechanism.
175  sort(begin(pool), end(pool), LessTupleElement<0>());
176  }
177 }
178 
179 void FilePool::writeSha1sums()
180 {
181  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
182  ofstream file;
183  FileOperations::openofstream(file, cacheFile);
184  if (!file.is_open()) {
185  return;
186  }
187  for (auto& p : pool) {
188  file << get<0>(p).toString() << " " // sum
189  << Date::toString(get<1>(p)) << " " // date
190  << get<2>(p) // filename
191  << '\n';
192  }
193 }
194 
195 static int parseTypes(Interpreter& interp, const TclObject& list)
196 {
197  int result = 0;
198  unsigned num = list.getListLength(interp);
199  for (unsigned i = 0; i < num; ++i) {
200  string_ref elem = list.getListIndex(interp, i).getString();
201  if (elem == "system_rom") {
202  result |= FilePool::SYSTEM_ROM;
203  } else if (elem == "rom") {
204  result |= FilePool::ROM;
205  } else if (elem == "disk") {
206  result |= FilePool::DISK;
207  } else if (elem == "tape") {
208  result |= FilePool::TAPE;
209  } else {
210  throw CommandException("Unknown type: " + elem);
211  }
212  }
213  return result;
214 }
215 
216 void FilePool::update(const Setting& setting)
217 {
218  assert(&setting == &filePoolSetting); (void)setting;
219  getDirectories(); // check for syntax errors
220 }
221 
222 FilePool::Directories FilePool::getDirectories() const
223 {
224  Directories result;
225  auto& interp = filePoolSetting.getInterpreter();
226  const TclObject& all = filePoolSetting.getValue();
227  unsigned numLines = all.getListLength(interp);
228  for (unsigned i = 0; i < numLines; ++i) {
229  Entry entry;
230  bool hasPath = false;
231  entry.types = 0;
232  TclObject line = all.getListIndex(interp, i);
233  unsigned numItems = line.getListLength(interp);
234  if (numItems & 1) {
235  throw CommandException(
236  "Expected a list with an even number "
237  "of elements, but got " + line.getString());
238  }
239  for (unsigned j = 0; j < numItems; j += 2) {
240  string_ref name = line.getListIndex(interp, j + 0).getString();
241  TclObject value = line.getListIndex(interp, j + 1);
242  if (name == "-path") {
243  entry.path = value.getString().str();
244  hasPath = true;
245  } else if (name == "-types") {
246  entry.types = parseTypes(interp, value);
247  } else {
248  throw CommandException(
249  "Unknown item: " + name);
250  }
251  }
252  if (!hasPath) {
253  throw CommandException(
254  "Missing -path item: " + line.getString());
255  }
256  if (entry.types == 0) {
257  throw CommandException(
258  "Missing -types item: " + line.getString());
259  }
260  result.push_back(entry);
261  }
262  return result;
263 }
264 
265 File FilePool::getFile(FileType fileType, const Sha1Sum& sha1sum)
266 {
267  File result = getFromPool(sha1sum);
268  if (result.is_open()) return result;
269 
270  // not found in cache, need to scan directories
271  ScanProgress progress;
272  progress.lastTime = Timer::getTime();
273  progress.amountScanned = 0;
274 
275  Directories directories;
276  try {
277  directories = getDirectories();
278  } catch (CommandException& e) {
279  reactor.getCliComm().printWarning(
280  "Error while parsing '__filepool' setting" + e.getMessage());
281  }
282  for (auto& d : directories) {
283  if (d.types & fileType) {
284  string path = FileOperations::expandTilde(d.path);
285  result = scanDirectory(sha1sum, path, d.path, progress);
286  if (result.is_open()) return result;
287  }
288  }
289 
290  return result; // not found
291 }
292 
293 static void reportProgress(const string& filename, size_t percentage,
294  Reactor& reactor)
295 {
296  reactor.getCliComm().printProgress(
297  "Calculating SHA1 sum for " + filename + "... " + StringOp::toString(percentage) + '%');
298  reactor.getDisplay().repaint();
299 }
300 
301 static Sha1Sum calcSha1sum(File& file, Reactor& reactor)
302 {
303  // Calculate sha1 in several steps so that we can show progress
304  // information. We take a fixed step size for an efficient calculation.
305  static const size_t STEP_SIZE = 1024 * 1024; // 1MB
306 
307  size_t size;
308  const byte* data = file.mmap(size);
309  string filename = file.getOriginalName();
310 
311  SHA1 sha1;
312  size_t done = 0;
313  size_t remaining = size;
314  auto lastShowedProgress = Timer::getTime();
315  bool everShowedProgress = false;
316 
317  // Loop over all-but-the last blocks. For small files this loop is skipped.
318  while (remaining > STEP_SIZE) {
319  sha1.update(&data[done], STEP_SIZE);
320  done += STEP_SIZE;
321  remaining -= STEP_SIZE;
322 
323  auto now = Timer::getTime();
324  if ((now - lastShowedProgress) > 1000000) {
325  reportProgress(filename, (100 * done) / size, reactor);
326  lastShowedProgress = now;
327  everShowedProgress = true;
328  }
329  }
330  // last block
331  sha1.update(&data[done], remaining);
332  if (everShowedProgress) {
333  reportProgress(filename, 100, reactor);
334  }
335  return sha1.digest();
336 }
337 
338 File FilePool::getFromPool(const Sha1Sum& sha1sum)
339 {
340  auto bound = equal_range(begin(pool), end(pool), sha1sum,
342  // use indices instead of iterators
343  auto i = distance(begin(pool), bound.first);
344  auto last = distance(begin(pool), bound.second);
345  while (i != last) {
346  auto it = begin(pool) + i;
347  auto& time = get<1>(*it);
348  const auto& filename = get<2>(*it);
349  try {
350  File file(filename);
351  auto newTime = file.getModificationDate();
352  if (time == newTime) {
353  // When modification time is unchanged, assume
354  // sha1sum is also unchanged. So avoid
355  // expensive sha1sum calculation.
356  return file;
357  }
358  time = newTime; // update timestamp
359  needWrite = true;
360  auto newSum = calcSha1sum(file, reactor);
361  if (newSum == sha1sum) {
362  // Modification time was changed, but
363  // (recalculated) sha1sum is still the same.
364  return file;
365  }
366  // Sha1sum has changed: update sha1sum, move entry to
367  // new position new sum and continue searching.
368  if (adjust(it, newSum)) {
369  // after
370  --last; // no ++i
371  } else {
372  // before (or at)
373  ++i;
374  }
375  } catch (FileException&) {
376  // Error reading file: remove from db and continue
377  // searching.
378  remove(it);
379  --last;
380  }
381  }
382  return File(); // not found
383 }
384 
385 File FilePool::scanDirectory(
386  const Sha1Sum& sha1sum, const string& directory, const string& poolPath,
387  ScanProgress& progress)
388 {
389  ReadDir dir(directory);
390  while (dirent* d = dir.getEntry()) {
391  if (quit) {
392  // Scanning can take a long time. Allow to exit
393  // openmsx when it takes too long. Stop scanning
394  // by pretending we didn't find the file.
395  return File();
396  }
397  string file = d->d_name;
398  string path = directory + '/' + file;
400  if (FileOperations::getStat(path, st)) {
401  File result;
403  result = scanFile(sha1sum, path, st, poolPath, progress);
404  } else if (FileOperations::isDirectory(st)) {
405  if ((file != ".") && (file != "..")) {
406  result = scanDirectory(sha1sum, path, poolPath, progress);
407  }
408  }
409  if (result.is_open()) return result;
410  }
411  }
412  return File(); // not found
413 }
414 
415 File FilePool::scanFile(const Sha1Sum& sha1sum, const string& filename,
416  const FileOperations::Stat& st, const string& poolPath,
417  ScanProgress& progress)
418 {
419  ++progress.amountScanned;
420  // Periodically send a progress message with the current filename
421  auto now = Timer::getTime();
422  if (now > (progress.lastTime + 250000)) { // 4Hz
423  progress.lastTime = now;
424  reactor.getCliComm().printProgress("Searching for file with sha1sum " +
425  sha1sum.toString() + "...\nIndexing filepool " + poolPath +
426  ": [" + StringOp::toString(progress.amountScanned) + "]: " +
427  filename.substr(poolPath.size()));
428  }
429 
430  // deliverEvents() is relatively cheap when there are no events to
431  // deliver, so it's ok to call on each file.
433 
434  auto it = findInDatabase(filename);
435  if (it == end(pool)) {
436  // not in pool
437  try {
438  File file(filename);
439  auto sum = calcSha1sum(file, reactor);
440  auto time = FileOperations::getModificationDate(st);
441  insert(sum, time, filename);
442  if (sum == sha1sum) {
443  return file;
444  }
445  } catch (FileException&) {
446  // ignore
447  }
448  } else {
449  // already in pool
450  assert(filename == get<2>(*it));
451  try {
452  auto time = FileOperations::getModificationDate(st);
453  if (time == get<1>(*it)) {
454  // db is still up to date
455  if (get<0>(*it) == sha1sum) {
456  return File(filename);
457  }
458  } else {
459  // db outdated
460  File file(filename);
461  auto sum = calcSha1sum(file, reactor);
462  get<1>(*it) = time;
463  adjust(it, sum);
464  if (sum == sha1sum) {
465  return file;
466  }
467  }
468  } catch (FileException&) {
469  // error reading file, remove from db
470  remove(it);
471  }
472  }
473  return File(); // not found
474 }
475 
476 FilePool::Pool::iterator FilePool::findInDatabase(const string& filename)
477 {
478  // Linear search in pool for filename.
479  // Search from back to front because often, soon after this search, we
480  // will insert/remove an element from the vector. This requires
481  // shifting all elements in the vector starting from a certain
482  // position. Starting the search from the back increases the likelihood
483  // that the to-be-shifted elements are already in the memory cache.
484  for (auto it = pool.rbegin(); it != pool.rend(); ++it) {
485  if (get<2>(*it) == filename) {
486  return it.base() - 1;
487  }
488  }
489  return end(pool); // not found
490 }
491 
493 {
494  auto time = file.getModificationDate();
495  const auto& filename = file.getURL();
496 
497  auto it = findInDatabase(filename);
498  if ((it != end(pool)) && (get<1>(*it) == time)) {
499  // in database and modification time matches,
500  // assume sha1sum also matches
501  return get<0>(*it);
502  }
503 
504  // not in database or timestamp mismatch
505  auto sum = calcSha1sum(file, reactor);
506  if (it == end(pool)) {
507  // was not yet in database, insert new entry
508  insert(sum, time, filename);
509  } else {
510  // was already in database, but with wrong timestamp (and sha1sum)
511  get<1>(*it) = time;
512  adjust(it, sum);
513  }
514  return sum;
515 }
516 
517 int FilePool::signalEvent(const std::shared_ptr<const Event>& event)
518 {
519  (void)event; // avoid warning for non-assert compiles
520  assert(event->getType() == OPENMSX_QUIT_EVENT);
521  quit = true;
522  return 0;
523 }
524 
525 
526 // class Sha1SumCommand
527 
529  CommandController& commandController_, FilePool& filePool_)
530  : Command(commandController_, "sha1sum")
531  , filePool(filePool_)
532 {
533 }
534 
536 {
537  if (tokens.size() != 2) throw SyntaxError();
538  File file(tokens[1].getString());
539  result.setString(filePool.getSha1Sum(file).toString());
540 }
541 
542 string Sha1SumCommand::help(const vector<string>& /*tokens*/) const
543 {
544  return "Calculate sha1 value for the given file. If the file is "
545  "(g)zipped the sha1 is calculated on the unzipped version.";
546 }
547 
548 void Sha1SumCommand::tabCompletion(vector<string>& tokens) const
549 {
551 }
552 
553 } // namespace openmsx
Contains the main loop of openMSX.
Definition: Reactor.hh:61
bool isRegularFile(const Stat &st)
void update(const uint8_t *data, size_t len)
Incrementally calculate the hash value.
Definition: sha1.cc:220
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:167
const std::string getURL() const
Returns the URL of this file object.
Definition: File.cc:124
void registerEventListener(EventType type, EventListener &listener, Priority priority=OTHER)
Registers a given object to receive certain events.
string help(const vector< string > &tokens) const override
Print help for this command.
Definition: FilePool.cc:542
File getFile(FileType fileType, const Sha1Sum &sha1sum)
Search file with the given sha1sum.
Definition: FilePool.cc:265
void unregisterEventListener(EventType type, EventListener &listener)
Unregisters a previously registered event listener.
FileContext systemFileContext()
Definition: FileContext.cc:149
string toString(long long a)
Definition: StringOp.cc:150
void openofstream(std::ofstream &stream, const std::string &filename)
Open an ofstream in a platform-independent manner.
void printWarning(string_ref message)
Definition: CliComm.cc:20
uint8_t byte
8 bit unsigned integer
Definition: openmsx.hh:26
FilePool(CommandController &controler, Reactor &reactor)
Definition: FilePool.cc:68
string join(string_ref part1, string_ref part2)
Join two paths.
unsigned getListLength(Interpreter &interp) const
Definition: TclObject.cc:152
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
void execute(array_ref< TclObject > tokens, TclObject &result) override
Execute this command.
Definition: FilePool.cc:535
Sha1SumCommand(CommandController &commandController, FilePool &filePool)
Definition: FilePool.cc:528
T sum(const vecN< N, T > &x)
Definition: gl_vec.hh:289
EventDistributor & getEventDistributor()
Definition: Reactor.hh:76
void deliverEvents()
This actually delivers the events.
bool getStat(string_ref filename_, Stat &st)
Call stat() and return the stat structure.
static void completeFileName(std::vector< std::string > &tokens, const FileContext &context, const RANGE &extra)
Definition: Completer.hh:122
const std::string & getMessage() const
Definition: MSXException.hh:13
void attach(Observer< T > &observer)
Definition: Subject.hh:52
void repaint()
Redraw the display.
Definition: Display.cc:317
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:19
This class implements a subset of the proposal for std::array_ref (proposed for the next c++ standard...
Definition: array_ref.hh:19
Sha1Sum digest()
Get the final hash.
Definition: sha1.cc:257
const TclObject & getValue() const final override
Gets the current value of this setting as a TclObject.
Definition: Setting.hh:133
bool is_open() const
Return true iff this file handle refers to an open file.
Definition: File.hh:57
string getUserDataDir()
Get the openMSX data dir in the user&#39;s home directory.
time_t getModificationDate()
Get the date/time of last modification.
Definition: File.cc:145
std::string str() const
Definition: string_ref.cc:12
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
TclObject getListIndex(Interpreter &interp, unsigned index) const
Definition: TclObject.cc:170
std::string toString(time_t time)
Definition: Date.cc:152
Helper class to perform a sha1 calculation.
Definition: sha1.hh:71
std::iterator_traits< octet_iterator >::difference_type distance(octet_iterator first, octet_iterator last)
const std::string getOriginalName()
Get Original filename for this object.
Definition: File.cc:134
string_ref getString() const
Definition: TclObject.cc:139
size_t size() const
FileContext userFileContext(string_ref savePath)
Definition: FileContext.cc:161
size_type size() const
Definition: array_ref.hh:61
std::string toString() const
Definition: sha1.cc:131
void addListElement(string_ref element)
Definition: TclObject.cc:69
bool isDirectory(const Stat &st)
void setString(string_ref value)
Definition: TclObject.cc:14
void parse40(const char *str)
Definition: sha1.cc:114
const char *const FILE_CACHE
Definition: FilePool.cc:44
void printProgress(string_ref message)
Definition: CliComm.cc:30
void tabCompletion(vector< string > &tokens) const override
Attempt tab completion for this command.
Definition: FilePool.cc:548
uint8_t * data()
Sha1Sum getSha1Sum(File &file)
Calculate sha1sum for the given File object.
Definition: FilePool.cc:492
time_t fromString(const char *p)
Definition: Date.cc:31
Simple wrapper around openmdir() / readdir() / closedir() functions.
Definition: ReadDir.hh:15
const byte * mmap(size_t &size)
Map file in memory.
Definition: File.cc:89
void detach(Observer< T > &observer)
Definition: Subject.hh:58
string expandTilde(string_ref path)
Expand the &#39;~&#39; character to the users home directory.
CliComm & getCliComm()
Definition: Reactor.cc:265
Interpreter & getInterpreter() const
Definition: Setting.cc:162
Display & getDisplay()
Definition: Reactor.hh:80
uint64_t getTime()
Get current (real) time in us.
Definition: Timer.cc:8
mat4 rotate(float angle, const vec3 &axis)
Definition: gl_transform.hh:56
string_ref::const_iterator begin(const string_ref &x)
Definition: string_ref.hh:166
struct dirent * getEntry()
Get directory entry for next file.
Definition: ReadDir.cc:17
time_t getModificationDate(const Stat &st)
Get the date/time of last modification.