openMSX
FilePool.cc
Go to the documentation of this file.
1 #include "FilePool.hh"
2 #include "File.hh"
3 #include "FileException.hh"
4 #include "FileContext.hh"
5 #include "FileOperations.hh"
6 #include "TclObject.hh"
7 #include "ReadDir.hh"
8 #include "Date.hh"
9 #include "CommandController.hh"
10 #include "CommandException.hh"
11 #include "Display.hh"
12 #include "EventDistributor.hh"
13 #include "CliComm.hh"
14 #include "Reactor.hh"
15 #include "Timer.hh"
16 #include "StringOp.hh"
17 #include "memory.hh"
18 #include "sha1.hh"
19 #include "stl.hh"
20 #include <fstream>
21 #include <cassert>
22 
23 using std::ifstream;
24 using std::get;
25 using std::make_tuple;
26 using std::ofstream;
27 using std::pair;
28 using std::string;
29 using std::vector;
30 using std::unique_ptr;
31 
32 namespace openmsx {
33 
34 class Sha1SumCommand final : public Command
35 {
36 public:
37  Sha1SumCommand(CommandController& commandController, FilePool& filePool);
38  void execute(array_ref<TclObject> tokens, TclObject& result) override;
39  string help(const vector<string>& tokens) const override;
40  void tabCompletion(vector<string>& tokens) const override;
41 private:
42  FilePool& filePool;
43 };
44 
45 
46 const char* const FILE_CACHE = "/.filecache";
47 
48 static string initialFilePoolSettingValue()
49 {
50  TclObject result;
51 
52  for (auto& p : systemFileContext().getPaths()) {
53  TclObject entry1;
54  entry1.addListElement("-path");
55  entry1.addListElement(FileOperations::join(p, "systemroms"));
56  entry1.addListElement("-types");
57  entry1.addListElement("system_rom");
58  result.addListElement(entry1);
59 
60  TclObject entry2;
61  entry2.addListElement("-path");
62  entry2.addListElement(FileOperations::join(p, "software"));
63  entry2.addListElement("-types");
64  entry2.addListElement("rom disk tape");
65  result.addListElement(entry2);
66  }
67  return result.getString().str();
68 }
69 
71  : filePoolSetting(
72  controller, "__filepool",
73  "This is an internal setting. Don't change this directly, "
74  "instead use the 'filepool' command.",
75  initialFilePoolSettingValue())
76  , reactor(reactor_)
77  , quit(false)
78 {
79  filePoolSetting.attach(*this);
81  readSha1sums();
82  needWrite = false;
83 
84  sha1SumCommand = make_unique<Sha1SumCommand>(controller, *this);
85 }
86 
88 {
89  if (needWrite) {
90  writeSha1sums();
91  }
93  filePoolSetting.detach(*this);
94 }
95 
96 void FilePool::insert(const Sha1Sum& sum, time_t time, const string& filename)
97 {
98  auto it = upper_bound(begin(pool), end(pool), sum,
100  pool.insert(it, make_tuple(sum, time, filename));
101  needWrite = true;
102 }
103 
104 void FilePool::remove(Pool::iterator it)
105 {
106  pool.erase(it);
107  needWrite = true;
108 }
109 
110 // Change the sha1sum of the element pointed to by 'it' into 'newSum'.
111 // Also re-arrange the items so that pool remains sorted on sha1sum. Internally
112 // this method doesn't actually sort, it merely rotates the elements.
113 // Returns false if the new position is before (or at) the old position.
114 // Returns true if the new position is after the old position.
115 bool FilePool::adjust(Pool::iterator it, const Sha1Sum& newSum)
116 {
117  needWrite = true;
118  auto newIt = upper_bound(begin(pool), end(pool), newSum,
120  get<0>(*it) = newSum; // update sum
121  if (newIt > it) {
122  // move to back
123  rotate(it, it + 1, newIt);
124  return true;
125  } else {
126  if (newIt < it) {
127  // move to front
128  rotate(newIt, it, it + 1);
129  } else {
130  // (unlikely) sha1sum has changed, but after
131  // resorting item would remain in the same
132  // position
133  }
134  return false;
135  }
136 }
137 
138 static bool parse(const string& line, Sha1Sum& sha1, time_t& time, string& filename)
139 {
140  if (line.size() <= 68) return false;
141 
142  try {
143  sha1.parse40(line.data());
144  } catch (MSXException& /*e*/) {
145  return false;
146  }
147 
148  time = Date::fromString(line.data() + 42);
149  if (time == time_t(-1)) return false;
150 
151  filename.assign(line, 68, line.size());
152  return true;
153 }
154 
155 void FilePool::readSha1sums()
156 {
157  assert(pool.empty());
158 
159  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
160  ifstream file(cacheFile.c_str());
161  string line;
162  Sha1Sum sum;
163  string filename;
164  time_t time;
165  while (file.good()) {
166  getline(file, line);
167  if (parse(line, sum, time, filename)) {
168  pool.emplace_back(sum, time, filename);
169  }
170  }
171 
172  if (!std::is_sorted(begin(pool), end(pool), LessTupleElement<0>())) {
173  // This should _rarely_ happen. In fact it should only happen
174  // when .filecache was manually edited. Though because it's
175  // very important that pool is indeed sorted I've added this
176  // safety mechanism.
177  sort(begin(pool), end(pool), LessTupleElement<0>());
178  }
179 }
180 
181 void FilePool::writeSha1sums()
182 {
183  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
184  ofstream file;
185  FileOperations::openofstream(file, cacheFile);
186  if (!file.is_open()) {
187  return;
188  }
189  for (auto& p : pool) {
190  file << get<0>(p).toString() << " " // sum
191  << Date::toString(get<1>(p)) << " " // date
192  << get<2>(p) // filename
193  << '\n';
194  }
195 }
196 
197 static int parseTypes(Interpreter& interp, const TclObject& list)
198 {
199  int result = 0;
200  unsigned num = list.getListLength(interp);
201  for (unsigned i = 0; i < num; ++i) {
202  string_ref elem = list.getListIndex(interp, i).getString();
203  if (elem == "system_rom") {
204  result |= FilePool::SYSTEM_ROM;
205  } else if (elem == "rom") {
206  result |= FilePool::ROM;
207  } else if (elem == "disk") {
208  result |= FilePool::DISK;
209  } else if (elem == "tape") {
210  result |= FilePool::TAPE;
211  } else {
212  throw CommandException("Unknown type: " + elem);
213  }
214  }
215  return result;
216 }
217 
218 void FilePool::update(const Setting& setting)
219 {
220  assert(&setting == &filePoolSetting); (void)setting;
221  getDirectories(); // check for syntax errors
222 }
223 
224 FilePool::Directories FilePool::getDirectories() const
225 {
226  Directories result;
227  auto& interp = filePoolSetting.getInterpreter();
228  const TclObject& all = filePoolSetting.getValue();
229  unsigned numLines = all.getListLength(interp);
230  for (unsigned i = 0; i < numLines; ++i) {
231  Entry entry;
232  bool hasPath = false;
233  entry.types = 0;
234  TclObject line = all.getListIndex(interp, i);
235  unsigned numItems = line.getListLength(interp);
236  if (numItems & 1) {
237  throw CommandException(
238  "Expected a list with an even number "
239  "of elements, but got " + line.getString());
240  }
241  for (unsigned j = 0; j < numItems; j += 2) {
242  string_ref name = line.getListIndex(interp, j + 0).getString();
243  TclObject value = line.getListIndex(interp, j + 1);
244  if (name == "-path") {
245  entry.path = value.getString().str();
246  hasPath = true;
247  } else if (name == "-types") {
248  entry.types = parseTypes(interp, value);
249  } else {
250  throw CommandException(
251  "Unknown item: " + name);
252  }
253  }
254  if (!hasPath) {
255  throw CommandException(
256  "Missing -path item: " + line.getString());
257  }
258  if (entry.types == 0) {
259  throw CommandException(
260  "Missing -types item: " + line.getString());
261  }
262  result.push_back(entry);
263  }
264  return result;
265 }
266 
267 File FilePool::getFile(FileType fileType, const Sha1Sum& sha1sum)
268 {
269  File result = getFromPool(sha1sum);
270  if (result.is_open()) return result;
271 
272  // not found in cache, need to scan directories
273  ScanProgress progress;
274  progress.lastTime = Timer::getTime();
275  progress.amountScanned = 0;
276 
277  Directories directories;
278  try {
279  directories = getDirectories();
280  } catch (CommandException& e) {
281  reactor.getCliComm().printWarning(
282  "Error while parsing '__filepool' setting" + e.getMessage());
283  }
284  for (auto& d : directories) {
285  if (d.types & fileType) {
286  string path = FileOperations::expandTilde(d.path);
287  result = scanDirectory(sha1sum, path, d.path, progress);
288  if (result.is_open()) return result;
289  }
290  }
291 
292  return result; // not found
293 }
294 
295 static void reportProgress(const string& filename, size_t percentage,
296  Reactor& reactor)
297 {
298  reactor.getCliComm().printProgress(
299  "Calculating SHA1 sum for " + filename + "... " + StringOp::toString(percentage) + '%');
300  reactor.getDisplay().repaint();
301 }
302 
303 static Sha1Sum calcSha1sum(File& file, Reactor& reactor)
304 {
305  // Calculate sha1 in several steps so that we can show progress
306  // information. We take a fixed step size for an efficient calculation.
307  static const size_t STEP_SIZE = 1024 * 1024; // 1MB
308 
309  size_t size;
310  const byte* data = file.mmap(size);
311  string filename = file.getOriginalName();
312 
313  SHA1 sha1;
314  size_t done = 0;
315  size_t remaining = size;
316  auto lastShowedProgress = Timer::getTime();
317  bool everShowedProgress = false;
318 
319  // Loop over all-but-the last blocks. For small files this loop is skipped.
320  while (remaining > STEP_SIZE) {
321  sha1.update(&data[done], STEP_SIZE);
322  done += STEP_SIZE;
323  remaining -= STEP_SIZE;
324 
325  auto now = Timer::getTime();
326  if ((now - lastShowedProgress) > 1000000) {
327  reportProgress(filename, (100 * done) / size, reactor);
328  lastShowedProgress = now;
329  everShowedProgress = true;
330  }
331  }
332  // last block
333  sha1.update(&data[done], remaining);
334  if (everShowedProgress) {
335  reportProgress(filename, 100, reactor);
336  }
337  return sha1.digest();
338 }
339 
340 File FilePool::getFromPool(const Sha1Sum& sha1sum)
341 {
342  auto bound = equal_range(begin(pool), end(pool), sha1sum,
344  // use indices instead of iterators
345  auto i = distance(begin(pool), bound.first);
346  auto last = distance(begin(pool), bound.second);
347  while (i != last) {
348  auto it = begin(pool) + i;
349  auto& time = get<1>(*it);
350  const auto& filename = get<2>(*it);
351  try {
352  File file(filename);
353  auto newTime = file.getModificationDate();
354  if (time == newTime) {
355  // When modification time is unchanged, assume
356  // sha1sum is also unchanged. So avoid
357  // expensive sha1sum calculation.
358  return file;
359  }
360  time = newTime; // update timestamp
361  needWrite = true;
362  auto newSum = calcSha1sum(file, reactor);
363  if (newSum == sha1sum) {
364  // Modification time was changed, but
365  // (recalculated) sha1sum is still the same.
366  return file;
367  }
368  // Sha1sum has changed: update sha1sum, move entry to
369  // new position new sum and continue searching.
370  if (adjust(it, newSum)) {
371  // after
372  --last; // no ++i
373  } else {
374  // before (or at)
375  ++i;
376  }
377  } catch (FileException&) {
378  // Error reading file: remove from db and continue
379  // searching.
380  remove(it);
381  --last;
382  }
383  }
384  return File(); // not found
385 }
386 
387 File FilePool::scanDirectory(
388  const Sha1Sum& sha1sum, const string& directory, const string& poolPath,
389  ScanProgress& progress)
390 {
391  ReadDir dir(directory);
392  while (dirent* d = dir.getEntry()) {
393  if (quit) {
394  // Scanning can take a long time. Allow to exit
395  // openmsx when it takes too long. Stop scanning
396  // by pretending we didn't find the file.
397  return File();
398  }
399  string file = d->d_name;
400  string path = directory + '/' + file;
402  if (FileOperations::getStat(path, st)) {
403  File result;
405  result = scanFile(sha1sum, path, st, poolPath, progress);
406  } else if (FileOperations::isDirectory(st)) {
407  if ((file != ".") && (file != "..")) {
408  result = scanDirectory(sha1sum, path, poolPath, progress);
409  }
410  }
411  if (result.is_open()) return result;
412  }
413  }
414  return File(); // not found
415 }
416 
417 File FilePool::scanFile(const Sha1Sum& sha1sum, const string& filename,
418  const FileOperations::Stat& st, const string& poolPath,
419  ScanProgress& progress)
420 {
421  ++progress.amountScanned;
422  // Periodically send a progress message with the current filename
423  auto now = Timer::getTime();
424  if (now > (progress.lastTime + 250000)) { // 4Hz
425  progress.lastTime = now;
426  reactor.getCliComm().printProgress("Searching for file with sha1sum " +
427  sha1sum.toString() + "...\nIndexing filepool " + poolPath +
428  ": [" + StringOp::toString(progress.amountScanned) + "]: " +
429  filename.substr(poolPath.size()));
430  }
431 
432  // deliverEvents() is relatively cheap when there are no events to
433  // deliver, so it's ok to call on each file.
435 
436  auto it = findInDatabase(filename);
437  if (it == end(pool)) {
438  // not in pool
439  try {
440  File file(filename);
441  auto sum = calcSha1sum(file, reactor);
442  auto time = FileOperations::getModificationDate(st);
443  insert(sum, time, filename);
444  if (sum == sha1sum) {
445  return file;
446  }
447  } catch (FileException&) {
448  // ignore
449  }
450  } else {
451  // already in pool
452  assert(filename == get<2>(*it));
453  try {
454  auto time = FileOperations::getModificationDate(st);
455  if (time == get<1>(*it)) {
456  // db is still up to date
457  if (get<0>(*it) == sha1sum) {
458  return File(filename);
459  }
460  } else {
461  // db outdated
462  File file(filename);
463  auto sum = calcSha1sum(file, reactor);
464  get<1>(*it) = time;
465  adjust(it, sum);
466  if (sum == sha1sum) {
467  return file;
468  }
469  }
470  } catch (FileException&) {
471  // error reading file, remove from db
472  remove(it);
473  }
474  }
475  return File(); // not found
476 }
477 
478 FilePool::Pool::iterator FilePool::findInDatabase(const string& filename)
479 {
480  // Linear search in pool for filename.
481  // Search from back to front because often, soon after this search, we
482  // will insert/remove an element from the vector. This requires
483  // shifting all elements in the vector starting from a certain
484  // position. Starting the search from the back increases the likelihood
485  // that the to-be-shifted elements are already in the memory cache.
486  for (auto it = pool.rbegin(); it != pool.rend(); ++it) {
487  if (get<2>(*it) == filename) {
488  return it.base() - 1;
489  }
490  }
491  return end(pool); // not found
492 }
493 
495 {
496  auto time = file.getModificationDate();
497  const auto& filename = file.getURL();
498 
499  auto it = findInDatabase(filename);
500  if ((it != end(pool)) && (get<1>(*it) == time)) {
501  // in database and modification time matches,
502  // assume sha1sum also matches
503  return get<0>(*it);
504  }
505 
506  // not in database or timestamp mismatch
507  auto sum = calcSha1sum(file, reactor);
508  if (it == end(pool)) {
509  // was not yet in database, insert new entry
510  insert(sum, time, filename);
511  } else {
512  // was already in database, but with wrong timestamp (and sha1sum)
513  get<1>(*it) = time;
514  adjust(it, sum);
515  }
516  return sum;
517 }
518 
519 int FilePool::signalEvent(const std::shared_ptr<const Event>& event)
520 {
521  (void)event; // avoid warning for non-assert compiles
522  assert(event->getType() == OPENMSX_QUIT_EVENT);
523  quit = true;
524  return 0;
525 }
526 
527 
528 // class Sha1SumCommand
529 
531  CommandController& commandController_, FilePool& filePool_)
532  : Command(commandController_, "sha1sum")
533  , filePool(filePool_)
534 {
535 }
536 
538 {
539  if (tokens.size() != 2) throw SyntaxError();
540  File file(tokens[1].getString());
541  result.setString(filePool.getSha1Sum(file).toString());
542 }
543 
544 string Sha1SumCommand::help(const vector<string>& /*tokens*/) const
545 {
546  return "Calculate sha1 value for the given file. If the file is "
547  "(g)zipped the sha1 is calculated on the unzipped version.";
548 }
549 
550 void Sha1SumCommand::tabCompletion(vector<string>& tokens) const
551 {
553 }
554 
555 } // namespace openmsx
Contains the main loop of openMSX.
Definition: Reactor.hh:61
bool isRegularFile(const Stat &st)
void update(const uint8_t *data, size_t len)
Incrementally calculate the hash value.
Definition: sha1.cc:222
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:167
size_type size() const
Definition: array_ref.hh:61
bool is_open() const
Return true iff this file handle refers to an open file.
Definition: File.hh:57
std::string str() const
Definition: string_ref.cc:12
void registerEventListener(EventType type, EventListener &listener, Priority priority=OTHER)
Registers a given object to receive certain events.
string_ref getString() const
Definition: TclObject.cc:139
string help(const vector< string > &tokens) const override
Print help for this command.
Definition: FilePool.cc:544
File getFile(FileType fileType, const Sha1Sum &sha1sum)
Search file with the given sha1sum.
Definition: FilePool.cc:267
void unregisterEventListener(EventType type, EventListener &listener)
Unregisters a previously registered event listener.
FileContext systemFileContext()
Definition: FileContext.cc:149
string toString(long long a)
Definition: StringOp.cc:156
void openofstream(std::ofstream &stream, const std::string &filename)
Open an ofstream in a platform-independent manner.
void printWarning(string_ref message)
Definition: CliComm.cc:28
uint8_t byte
8 bit unsigned integer
Definition: openmsx.hh:26
FilePool(CommandController &controler, Reactor &reactor)
Definition: FilePool.cc:70
string join(string_ref part1, string_ref part2)
Join two paths.
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
void execute(array_ref< TclObject > tokens, TclObject &result) override
Execute this command.
Definition: FilePool.cc:537
Sha1SumCommand(CommandController &commandController, FilePool &filePool)
Definition: FilePool.cc:530
T sum(const vecN< N, T > &x)
Definition: gl_vec.hh:289
EventDistributor & getEventDistributor()
Definition: Reactor.hh:76
void deliverEvents()
This actually delivers the events.
bool getStat(string_ref filename_, Stat &st)
Call stat() and return the stat structure.
static void completeFileName(std::vector< std::string > &tokens, const FileContext &context, const RANGE &extra)
Definition: Completer.hh:122
void attach(Observer< T > &observer)
Definition: Subject.hh:52
void repaint()
Redraw the display.
Definition: Display.cc:317
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:19
This class implements a subset of the proposal for std::array_ref (proposed for the next c++ standard...
Definition: array_ref.hh:19
Sha1Sum digest()
Get the final hash.
Definition: sha1.cc:259
TclObject getListIndex(Interpreter &interp, unsigned index) const
Definition: TclObject.cc:170
unsigned getListLength(Interpreter &interp) const
Definition: TclObject.cc:152
const std::string & getMessage() const
Definition: MSXException.hh:14
string getUserDataDir()
Get the openMSX data dir in the user&#39;s home directory.
time_t getModificationDate()
Get the date/time of last modification.
Definition: File.cc:149
Interpreter & getInterpreter() const
Definition: Setting.cc:162
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
Helper class to perform a sha1 calculation.
Definition: sha1.hh:71
std::iterator_traits< octet_iterator >::difference_type distance(octet_iterator first, octet_iterator last)
const std::string getOriginalName()
Get Original filename for this object.
Definition: File.cc:138
std::string toString() const
Definition: sha1.cc:131
FileContext userFileContext(string_ref savePath)
Definition: FileContext.cc:161
void addListElement(string_ref element)
Definition: TclObject.cc:69
bool isDirectory(const Stat &st)
void setString(string_ref value)
Definition: TclObject.cc:14
void parse40(const char *str)
Definition: sha1.cc:114
const char *const FILE_CACHE
Definition: FilePool.cc:46
void printProgress(string_ref message)
Definition: CliComm.cc:38
void tabCompletion(vector< string > &tokens) const override
Attempt tab completion for this command.
Definition: FilePool.cc:550
uint8_t * data()
Sha1Sum getSha1Sum(File &file)
Calculate sha1sum for the given File object.
Definition: FilePool.cc:494
const std::string getURL() const
Returns the URL of this file object.
Definition: File.cc:128
time_t fromString(const char *p)
Definition: Date.cc:31
const TclObject & getValue() const finaloverride
Gets the current value of this setting as a TclObject.
Definition: Setting.hh:133
size_t size() const
Simple wrapper around openmdir() / readdir() / closedir() functions.
Definition: ReadDir.hh:15
const byte * mmap(size_t &size)
Map file in memory.
Definition: File.cc:93
void detach(Observer< T > &observer)
Definition: Subject.hh:58
string expandTilde(string_ref path)
Expand the &#39;~&#39; character to the users home directory.
CliComm & getCliComm()
Definition: Reactor.cc:265
Display & getDisplay()
Definition: Reactor.hh:80
uint64_t getTime()
Get current (real) time in us.
Definition: Timer.cc:8
mat4 rotate(float angle, const vec3 &axis)
Definition: gl_transform.hh:56
string_ref::const_iterator begin(const string_ref &x)
Definition: string_ref.hh:166
struct dirent * getEntry()
Get directory entry for next file.
Definition: ReadDir.cc:17
time_t getModificationDate(const Stat &st)
Get the date/time of last modification.