openMSX
RomDatabase.cc
Go to the documentation of this file.
1 #include "RomDatabase.hh"
2 #include "RomInfo.hh"
3 #include "FileContext.hh"
4 #include "File.hh"
5 #include "FileOperations.hh"
6 #include "CliComm.hh"
7 #include "MSXException.hh"
8 #include "StringOp.hh"
9 #include "String32.hh"
10 #include "hash_map.hh"
11 #include "ranges.hh"
12 #include "rapidsax.hh"
13 #include "unreachable.hh"
14 #include "stl.hh"
15 #include "view.hh"
16 #include "xxhash.hh"
17 #include <cassert>
18 #include <stdexcept>
19 
20 using std::string;
21 using std::vector;
22 
23 namespace openmsx {
24 
26 
28 {
29 public:
31  CliComm& cliComm_, char* bufStart_)
32  : db(db_)
33  , unknownTypes(unknownTypes_)
34  , cliComm(cliComm_)
35  , bufStart(bufStart_)
36  , state(BEGIN)
37  , unknownLevel(0)
38  , initialSize(db.size())
39  {
40  }
41 
42  // rapidsax handler interface
43  void start(string_view tag);
44  void attribute(string_view name, string_view value);
45  void text(string_view txt);
46  void stop();
47  void doctype(string_view txt);
48 
49  string_view getSystemID() const { return systemID; }
50 
51 private:
52  String32 cIndex(string_view str);
53  void addEntries();
54  void addAllEntries();
55 
56  enum State {
57  BEGIN,
58  SOFTWAREDB,
59  SOFTWARE,
60  SYSTEM,
61  TITLE,
62  COMPANY,
63  YEAR,
64  COUNTRY,
65  GENMSXID,
66  DUMP_REMARK,
67  DUMP_TEXT,
68  DUMP,
69  ORIGINAL,
70  ROM,
71  TYPE,
72  START,
73  HASH,
74  END
75  };
76 
77  struct Dump {
78  String32 remark;
79  Sha1Sum hash;
80  String32 origData;
81  RomType type;
82  bool origValue;
83  };
84 
86  UnknownTypes& unknownTypes;
87  CliComm& cliComm;
88  char* bufStart;
89 
90  string_view systemID;
91  string_view type;
92  string_view startVal;
93 
94  vector<Dump> dumps;
95  string_view system;
96  String32 title;
97  String32 company;
98  String32 year;
99  String32 country;
100  int genMSXid;
101 
102  State state;
103  unsigned unknownLevel;
104  size_t initialSize;
105 };
106 
108 {
109  if (unknownLevel) {
110  ++unknownLevel;
111  return;
112  }
113 
114  assert(!tag.empty()); // rapidsax will reject empty tags
115  switch (state) {
116  case BEGIN:
117  if (tag == "softwaredb") {
118  state = SOFTWAREDB;
119  return;
120  }
121  throw MSXException("Expected <softwaredb> as root tag.");
122  case SOFTWAREDB:
123  if (small_compare<'s','o','f','t','w','a','r','e'>(tag)) {
124  system.clear();
125  toString32(bufStart, bufStart, title);
126  toString32(bufStart, bufStart, company);
127  toString32(bufStart, bufStart, year);
128  toString32(bufStart, bufStart, country);
129  genMSXid = 0;
130  dumps.clear();
131  state = SOFTWARE;
132  return;
133  }
134  break;
135  case SOFTWARE: {
136  switch (tag.front()) {
137  case 's':
138  if (small_compare<'s','y','s','t','e','m'>(tag)) {
139  state = SYSTEM;
140  return;
141  }
142  break;
143  case 't':
144  tag.pop_front();
145  if (small_compare<'i','t','l','e'>(tag)) {
146  state = TITLE;
147  return;
148  }
149  break;
150  case 'c':
151  if (small_compare<'c','o','m','p','a','n','y'>(tag)) {
152  state = COMPANY;
153  return;
154  } else if (small_compare<'c','o','u','n','t','r','y'>(tag)) {
155  state = COUNTRY;
156  return;
157  }
158  break;
159  case 'y':
160  if (small_compare<'y','e','a','r'>(tag)) {
161  state = YEAR;
162  return;
163  }
164  break;
165  case 'g':
166  if (small_compare<'g','e','n','m','s','x','i','d'>(tag)) {
167  state = GENMSXID;
168  return;
169  }
170  break;
171  case 'd':
172  if (small_compare<'d','u','m','p'>(tag)) {
173  dumps.resize(dumps.size() + 1);
174  dumps.back().type = ROM_UNKNOWN;
175  dumps.back().origValue = false;
176  toString32(bufStart, bufStart, dumps.back().remark);
177  toString32(bufStart, bufStart, dumps.back().origData);
178  state = DUMP;
179  return;
180  }
181  break;
182  }
183  break;
184  }
185  case DUMP: {
186  switch (tag.front()) {
187  case 'o':
188  if (small_compare<'o','r','i','g','i','n','a','l'>(tag)) {
189  dumps.back().origValue = false;
190  state = ORIGINAL;
191  return;
192  }
193  break;
194  case 'm':
195  if (small_compare<'m','e','g','a','r','o','m'>(tag)) {
196  type.clear();
197  startVal.clear();
198  state = ROM;
199  return;
200  }
201  break;
202  case 'r':
203  tag.pop_front();
204  if (small_compare<'o','m'>(tag)) {
205  type = "Mirrored";
206  startVal.clear();
207  state = ROM;
208  return;
209  }
210  break;
211  }
212  break;
213  }
214  case ROM: {
215  switch (tag.front()) {
216  case 't':
217  if (small_compare<'t','y','p','e'>(tag)) {
218  state = TYPE;
219  return;
220  }
221  break;
222  case 's':
223  tag.pop_front();
224  if (small_compare<'t','a','r','t'>(tag)) {
225  state = START;
226  return;
227  }
228  break;
229  case 'r':
230  if (small_compare<'r','e','m','a','r','k'>(tag)) {
231  state = DUMP_REMARK;
232  return;
233  }
234  break;
235  case 'h':
236  if (small_compare<'h','a','s','h'>(tag)) {
237  state = HASH;
238  return;
239  }
240  break;
241  }
242  break;
243  }
244  case DUMP_REMARK:
245  if (small_compare<'t','e','x','t'>(tag)) {
246  state = DUMP_TEXT;
247  return;
248  }
249  break;
250  case SYSTEM:
251  case TITLE:
252  case COMPANY:
253  case YEAR:
254  case COUNTRY:
255  case GENMSXID:
256  case ORIGINAL:
257  case TYPE:
258  case START:
259  case HASH:
260  case DUMP_TEXT:
261  break;
262 
263  case END:
264  throw MSXException("Unexpected opening tag: ", tag);
265 
266  default:
267  UNREACHABLE;
268  }
269 
270  ++unknownLevel;
271 }
272 
274 {
275  if (unknownLevel) return;
276 
277  switch (state) {
278  case ORIGINAL:
279  if (small_compare<'v','a','l','u','e'>(name)) {
280  dumps.back().origValue = StringOp::stringToBool(value);
281  }
282  break;
283  case HASH:
284  case BEGIN:
285  case SOFTWAREDB:
286  case SOFTWARE:
287  case SYSTEM:
288  case TITLE:
289  case COMPANY:
290  case YEAR:
291  case COUNTRY:
292  case GENMSXID:
293  case DUMP_REMARK:
294  case DUMP_TEXT:
295  case DUMP:
296  case ROM:
297  case TYPE:
298  case START:
299  case END:
300  break;
301  default:
302  UNREACHABLE;
303  }
304 }
305 
307 {
308  if (unknownLevel) return;
309 
310  switch (state) {
311  case SYSTEM:
312  system = txt;
313  break;
314  case TITLE:
315  title = cIndex(txt);
316  break;
317  case COMPANY:
318  company = cIndex(txt);
319  break;
320  case YEAR:
321  year = cIndex(txt);
322  break;
323  case COUNTRY:
324  country = cIndex(txt);
325  break;
326  case GENMSXID:
327  try {
328  genMSXid = fast_stou(txt);
329  } catch (std::invalid_argument&) {
330  cliComm.printWarning(
331  "Ignoring bad Generation MSX id (genmsxid) "
332  "in entry with title '", title,
333  ": ", txt);
334  }
335  break;
336  case ORIGINAL:
337  dumps.back().origData = cIndex(txt);
338  break;
339  case TYPE:
340  type = txt;
341  break;
342  case START:
343  startVal = txt;
344  break;
345  case HASH:
346  dumps.back().hash = Sha1Sum(txt);
347  break;
348  case DUMP_REMARK:
349  case DUMP_TEXT:
350  dumps.back().remark = cIndex(txt);
351  break;
352  case BEGIN:
353  case SOFTWAREDB:
354  case SOFTWARE:
355  case DUMP:
356  case ROM:
357  case END:
358  break;
359  default:
360  UNREACHABLE;
361  }
362 }
363 
364 String32 DBParser::cIndex(string_view str)
365 {
366  auto* begin = const_cast<char*>(str.data());
367  auto* end = begin + str.size();
368  *end = 0;
369  String32 result;
370  toString32(bufStart, begin, result);
371  return result;
372 }
373 
374 // called on </software>
375 void DBParser::addEntries()
376 {
377  append(db, view::transform(dumps, [&](auto& d) {
378  return std::make_pair(
379  d.hash,
380  RomInfo(title, year, company, country, d.origValue,
381  d.origData, d.remark, d.type, genMSXid));
382  }));
383 }
384 
385 // called on </softwaredb>
386 void DBParser::addAllEntries()
387 {
388  // Calculate boundary between old and new entries.
389  // old: [first, mid) already sorted, no duplicates
390  // new: [mid, last) not yet sorted, may have duplicates
391  // there may also be duplicates between old and new
392  const auto first = begin(db);
393  const auto last = end (db);
394  const auto mid = first + initialSize;
395  if (mid == last) return; // no new entries
396 
397  // Sort new entries, old entries are already sorted.
398  std::sort(mid, last, LessTupleElement<0>());
399 
400  // Filter duplicates from new entries. This is similar to the
401  // unique() algorithm, except that it also warns about duplicates.
402  auto it1 = mid;
403  auto it2 = mid + 1;
404  // skip initial non-duplicates
405  while (it2 != last) {
406  if (it1->first == it2->first) break;
407  ++it1; ++it2;
408  }
409  // move non-duplicates up
410  while (it2 != last) {
411  if (it1->first == it2->first) {
412  cliComm.printWarning(
413  "duplicate softwaredb entry SHA1: ",
414  it2->first.toString());
415  } else {
416  ++it1;
417  *it1 = std::move(*it2);
418  }
419  ++it2;
420  }
421  // actually erase the duplicates (typically none)
422  db.erase(it1 + 1, last);
423  // At this point both old and new entries are sorted and unique. But
424  // there may still be duplicates between old and new.
425 
426  // Merge new and old entries. This is similar to the inplace_merge()
427  // algorithm, except that duplicates (between old and new) are removed.
428  if (first == mid) return; // no old entries (common case)
429  RomDatabase::RomDB result;
430  result.reserve(db.size());
431  it1 = first;
432  it2 = mid;
433  // while both new and old still have elements
434  while (it1 != mid && it2 != last) {
435  if (it1->first < it2->first) {
436  result.push_back(std::move(*it1));
437  ++it1;
438  } else {
439  if (it1->first != it2->first) { // *it2 < *it1
440  result.push_back(std::move(*it2));
441  ++it2;
442  } else {
443  // pick old entry, silently ignore new
444  result.push_back(std::move(*it1));
445  ++it1; ++it2;
446  }
447  }
448  }
449  // move remaining old or new entries (one of these is empty)
450  move(it1, mid, back_inserter(result));
451  move(it2, last, back_inserter(result));
452 
453  // make result the new current database
454  swap(result, db);
455 }
456 
457 static const char* parseStart(string_view s)
458 {
459  // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
460  return ((s.size() == 6) && s.starts_with("0x")) ? (s.data() + 2) : nullptr;
461 }
462 
464 {
465  if (unknownLevel) {
466  --unknownLevel;
467  return;
468  }
469 
470  switch (state) {
471  case SOFTWAREDB:
472  addAllEntries();
473  state = END;
474  break;
475  case SOFTWARE:
476  addEntries();
477  state = SOFTWAREDB;
478  break;
479  case SYSTEM:
480  case TITLE:
481  case COMPANY:
482  case YEAR:
483  case COUNTRY:
484  case GENMSXID:
485  state = SOFTWARE;
486  break;
487  case DUMP:
488  if (dumps.back().hash.empty()) {
489  // no sha1 sum specified, drop this dump
490  dumps.pop_back();
491  }
492  state = SOFTWARE;
493  break;
494  case ORIGINAL:
495  state = DUMP;
496  break;
497  case ROM: {
498  string_view t = type;
499  char buf[12];
500  if (small_compare<'M','i','r','r','o','r','e','d'>(t)) {
501  if (const char* s = parseStart(startVal)) {
502  memcpy(buf, t.data(), 8);
503  memcpy(buf + 8, s, 4);
504  t = string_view(buf, 12);
505  }
506  } else if (small_compare<'N','o','r','m','a','l'>(t)) {
507  if (const char* s = parseStart(startVal)) {
508  memcpy(buf, t.data(), 6);
509  memcpy(buf + 6, s, 4);
510  t = string_view(buf, 10);
511  }
512  }
513  RomType romType = RomInfo::nameToRomType(t);
514  if (romType == ROM_UNKNOWN) {
515  unknownTypes[t.str()]++;
516  }
517  dumps.back().type = romType;
518  state = DUMP;
519  break;
520  }
521  case TYPE:
522  case START:
523  case HASH:
524  case DUMP_REMARK:
525  state = ROM;
526  break;
527  case DUMP_TEXT:
528  state = DUMP_REMARK;
529  break;
530  case BEGIN:
531  case END:
532  throw MSXException("Unexpected closing tag");
533 
534  default:
535  UNREACHABLE;
536  }
537 }
538 
540 {
541  auto pos1 = txt.find(" SYSTEM \"");
542  if (pos1 == string_view::npos) return;
543  auto t = txt.substr(pos1 + 9);
544  auto pos2 = t.find('"');
545  if (pos2 == string_view::npos) return;
546  systemID = t.substr(0, pos2);
547 }
548 
549 static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
550  RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
551 {
552  DBParser handler(db, unknownTypes, cliComm, bufStart);
553  rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
554 
555  if (handler.getSystemID() != "softwaredb1.dtd") {
556  throw rapidsax::ParseError(
557  "Missing or wrong systemID.\n"
558  "You're probably using an old incompatible file format.",
559  nullptr);
560  }
561 }
562 
564 {
565  db.reserve(3500);
566  UnknownTypes unknownTypes;
567  // first user- then system-directory
568  vector<string> paths = systemFileContext().getPaths();
569  vector<File> files;
570  size_t bufferSize = 0;
571  for (auto& p : paths) {
572  try {
573  files.emplace_back(FileOperations::join(p, "softwaredb.xml"));
574  bufferSize += files.back().getSize() + rapidsax::EXTRA_BUFFER_SPACE;
575  } catch (MSXException& /*e*/) {
576  // Ignore. It's not unusual the DB in the user
577  // directory is not found. In case there's an error
578  // with both user and system DB, we must give a
579  // warning, but that's done below.
580  }
581  }
582  buffer.resize(bufferSize);
583  size_t bufferOffset = 0;
584  for (auto& file : files) {
585  try {
586  auto size = file.getSize();
587  auto* buf = &buffer[bufferOffset];
588  bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
589  file.read(buf, size);
590  buf[size] = 0;
591 
592  parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
593  } catch (rapidsax::ParseError& e) {
594  cliComm.printWarning(
595  "Rom database parsing failed: ", e.what());
596  } catch (MSXException& /*e*/) {
597  // Ignore, see above
598  }
599  }
600  if (bufferSize) buffer[0] = 0;
601  if (db.empty()) {
602  cliComm.printWarning(
603  "Couldn't load software database.\n"
604  "This may cause incorrect ROM mapper types to be used.");
605  }
606  if (!unknownTypes.empty()) {
607  string output = "Unknown mapper types in software database: ";
608  for (auto& p : unknownTypes) {
609  strAppend(output, p.first, " (", p.second, "x); ");
610  }
611  cliComm.printWarning(output);
612  }
613 }
614 
615 const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
616 {
617  auto it = ranges::lower_bound(db, sha1sum, LessTupleElement<0>());
618  return ((it != end(db)) && (it->first == sha1sum))
619  ? &it->second : nullptr;
620 }
621 
622 } // namespace openmsx
const char * data() const
Definition: string_view.hh:57
auto transform(Range &&range, UnaryOp op)
Definition: view.hh:312
string join(string_view part1, string_view part2)
Join two paths.
bool starts_with(string_view x) const
Definition: string_view.cc:116
void swap(optional< T > &x, optional< T > &y) noexcept(noexcept(x.swap(y)))
Definition: optional.hh:816
constexpr void sort(RAIt first, RAIt last, Compare cmp=Compare{})
Definition: cstd.hh:104
FileContext systemFileContext()
Definition: FileContext.cc:148
void pop_front()
Definition: string_view.hh:75
char back() const
Definition: string_view.hh:56
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition: String32.hh:22
void toString32(const char *buffer, const char *str, uint32_t &result)
Definition: String32.hh:25
void text(string_view txt)
Definition: RomDatabase.cc:306
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:648
static const size_type npos
Definition: string_view.hh:24
auto begin(const string_view &x)
Definition: string_view.hh:151
string_view getSystemID() const
Definition: RomDatabase.cc:49
char front() const
Definition: string_view.hh:55
void append(Result &)
Definition: stl.hh:354
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:19
size_type find(string_view s) const
Definition: string_view.cc:38
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
Definition: RomDatabase.cc:615
bool stringToBool(string_view str)
Definition: StringOp.cc:41
bool empty() const
Definition: string_view.hh:45
This class implements a (close approximation) of the std::string_view class.
Definition: string_view.hh:16
std::vector< std::string > getPaths() const
Definition: FileContext.cc:103
bool empty() const
Definition: hash_set.hh:539
std::string str() const
Definition: string_view.cc:12
RomDatabase(CliComm &cliComm)
Definition: RomDatabase.cc:563
auto lower_bound(ForwardRange &&range, const T &value)
Definition: ranges.hh:71
string_view substr(size_type pos, size_type n=npos) const
Definition: string_view.cc:32
size_type size() const
Definition: string_view.hh:44
static RomType nameToRomType(string_view name)
Definition: RomInfo.cc:188
void attribute(string_view name, string_view value)
Definition: RomDatabase.cc:273
const char * what() const
Definition: rapidsax.hh:110
void doctype(string_view txt)
Definition: RomDatabase.cc:539
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
Definition: RomDatabase.cc:30
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.hh:62
unsigned fast_stou(string_view s)
Definition: string_view.cc:145
TclObject t
void start(string_view tag)
Definition: RomDatabase.cc:107
void printWarning(string_view message)
Definition: CliComm.cc:20
std::vector< std::pair< Sha1Sum, RomInfo > > RomDB
Definition: RomDatabase.hh:17
auto end(const string_view &x)
Definition: string_view.hh:152
#define UNREACHABLE
Definition: unreachable.hh:38