openMSX
RomDatabase.cc
Go to the documentation of this file.
1 #include "RomDatabase.hh"
2 #include "RomInfo.hh"
3 #include "FileContext.hh"
4 #include "File.hh"
5 #include "FileOperations.hh"
6 #include "CliComm.hh"
7 #include "MSXException.hh"
8 #include "StringOp.hh"
9 #include "String32.hh"
10 #include "hash_map.hh"
11 #include "ranges.hh"
12 #include "rapidsax.hh"
13 #include "unreachable.hh"
14 #include "stl.hh"
15 #include "view.hh"
16 #include "xxhash.hh"
17 #include <cassert>
18 #include <stdexcept>
19 
20 using std::string;
21 using std::string_view;
22 using std::vector;
23 
24 namespace openmsx {
25 
27 
29 {
30 public:
32  CliComm& cliComm_, char* bufStart_)
33  : db(db_)
34  , unknownTypes(unknownTypes_)
35  , cliComm(cliComm_)
36  , bufStart(bufStart_)
37  , state(BEGIN)
38  , unknownLevel(0)
39  , initialSize(db.size())
40  {
41  }
42 
43  // rapidsax handler interface
44  void start(string_view tag);
45  void attribute(string_view name, string_view value);
46  void text(string_view txt);
47  void stop();
48  void doctype(string_view txt);
49 
50  string_view getSystemID() const { return systemID; }
51 
52 private:
53  String32 cIndex(string_view str);
54  void addEntries();
55  void addAllEntries();
56 
57  enum State {
58  BEGIN,
59  SOFTWAREDB,
60  SOFTWARE,
61  SYSTEM,
62  TITLE,
63  COMPANY,
64  YEAR,
65  COUNTRY,
66  GENMSXID,
67  DUMP_REMARK,
68  DUMP_TEXT,
69  DUMP,
70  ORIGINAL,
71  ROM,
72  TYPE,
73  START,
74  HASH,
75  END
76  };
77 
78  struct Dump {
79  String32 remark;
80  Sha1Sum hash;
81  String32 origData;
82  RomType type;
83  bool origValue;
84  };
85 
87  UnknownTypes& unknownTypes;
88  CliComm& cliComm;
89  char* bufStart;
90 
91  string_view systemID;
92  string_view type;
93  string_view startVal;
94 
95  vector<Dump> dumps;
96  string_view system;
97  String32 title;
98  String32 company;
99  String32 year;
100  String32 country;
101  int genMSXid;
102 
103  State state;
104  unsigned unknownLevel;
105  size_t initialSize;
106 };
107 
108 void DBParser::start(string_view tag)
109 {
110  if (unknownLevel) {
111  ++unknownLevel;
112  return;
113  }
114 
115  assert(!tag.empty()); // rapidsax will reject empty tags
116  switch (state) {
117  case BEGIN:
118  if (tag == "softwaredb") {
119  state = SOFTWAREDB;
120  return;
121  }
122  throw MSXException("Expected <softwaredb> as root tag.");
123  case SOFTWAREDB:
124  if (small_compare<'s','o','f','t','w','a','r','e'>(tag)) {
125  system = string_view();
126  toString32(bufStart, bufStart, title);
127  toString32(bufStart, bufStart, company);
128  toString32(bufStart, bufStart, year);
129  toString32(bufStart, bufStart, country);
130  genMSXid = 0;
131  dumps.clear();
132  state = SOFTWARE;
133  return;
134  }
135  break;
136  case SOFTWARE: {
137  switch (tag.front()) {
138  case 's':
139  if (small_compare<'s','y','s','t','e','m'>(tag)) {
140  state = SYSTEM;
141  return;
142  }
143  break;
144  case 't':
145  tag.remove_prefix(1);
146  if (small_compare<'i','t','l','e'>(tag)) {
147  state = TITLE;
148  return;
149  }
150  break;
151  case 'c':
152  if (small_compare<'c','o','m','p','a','n','y'>(tag)) {
153  state = COMPANY;
154  return;
155  } else if (small_compare<'c','o','u','n','t','r','y'>(tag)) {
156  state = COUNTRY;
157  return;
158  }
159  break;
160  case 'y':
161  if (small_compare<'y','e','a','r'>(tag)) {
162  state = YEAR;
163  return;
164  }
165  break;
166  case 'g':
167  if (small_compare<'g','e','n','m','s','x','i','d'>(tag)) {
168  state = GENMSXID;
169  return;
170  }
171  break;
172  case 'd':
173  if (small_compare<'d','u','m','p'>(tag)) {
174  dumps.resize(dumps.size() + 1);
175  dumps.back().type = ROM_UNKNOWN;
176  dumps.back().origValue = false;
177  toString32(bufStart, bufStart, dumps.back().remark);
178  toString32(bufStart, bufStart, dumps.back().origData);
179  state = DUMP;
180  return;
181  }
182  break;
183  }
184  break;
185  }
186  case DUMP: {
187  switch (tag.front()) {
188  case 'o':
189  if (small_compare<'o','r','i','g','i','n','a','l'>(tag)) {
190  dumps.back().origValue = false;
191  state = ORIGINAL;
192  return;
193  }
194  break;
195  case 'm':
196  if (small_compare<'m','e','g','a','r','o','m'>(tag)) {
197  type = string_view();
198  startVal = string_view();
199  state = ROM;
200  return;
201  }
202  break;
203  case 'r':
204  tag.remove_prefix(1);
205  if (small_compare<'o','m'>(tag)) {
206  type = "Mirrored";
207  startVal = string_view();
208  state = ROM;
209  return;
210  }
211  break;
212  }
213  break;
214  }
215  case ROM: {
216  switch (tag.front()) {
217  case 't':
218  if (small_compare<'t','y','p','e'>(tag)) {
219  state = TYPE;
220  return;
221  }
222  break;
223  case 's':
224  tag.remove_prefix(1);
225  if (small_compare<'t','a','r','t'>(tag)) {
226  state = START;
227  return;
228  }
229  break;
230  case 'r':
231  if (small_compare<'r','e','m','a','r','k'>(tag)) {
232  state = DUMP_REMARK;
233  return;
234  }
235  break;
236  case 'h':
237  if (small_compare<'h','a','s','h'>(tag)) {
238  state = HASH;
239  return;
240  }
241  break;
242  }
243  break;
244  }
245  case DUMP_REMARK:
246  if (small_compare<'t','e','x','t'>(tag)) {
247  state = DUMP_TEXT;
248  return;
249  }
250  break;
251  case SYSTEM:
252  case TITLE:
253  case COMPANY:
254  case YEAR:
255  case COUNTRY:
256  case GENMSXID:
257  case ORIGINAL:
258  case TYPE:
259  case START:
260  case HASH:
261  case DUMP_TEXT:
262  break;
263 
264  case END:
265  throw MSXException("Unexpected opening tag: ", tag);
266 
267  default:
268  UNREACHABLE;
269  }
270 
271  ++unknownLevel;
272 }
273 
274 void DBParser::attribute(string_view name, string_view value)
275 {
276  if (unknownLevel) return;
277 
278  switch (state) {
279  case ORIGINAL:
280  if (small_compare<'v','a','l','u','e'>(name)) {
281  dumps.back().origValue = StringOp::stringToBool(value);
282  }
283  break;
284  case HASH:
285  case BEGIN:
286  case SOFTWAREDB:
287  case SOFTWARE:
288  case SYSTEM:
289  case TITLE:
290  case COMPANY:
291  case YEAR:
292  case COUNTRY:
293  case GENMSXID:
294  case DUMP_REMARK:
295  case DUMP_TEXT:
296  case DUMP:
297  case ROM:
298  case TYPE:
299  case START:
300  case END:
301  break;
302  default:
303  UNREACHABLE;
304  }
305 }
306 
307 void DBParser::text(string_view txt)
308 {
309  if (unknownLevel) return;
310 
311  switch (state) {
312  case SYSTEM:
313  system = txt;
314  break;
315  case TITLE:
316  title = cIndex(txt);
317  break;
318  case COMPANY:
319  company = cIndex(txt);
320  break;
321  case YEAR:
322  year = cIndex(txt);
323  break;
324  case COUNTRY:
325  country = cIndex(txt);
326  break;
327  case GENMSXID:
328  try {
329  genMSXid = StringOp::fast_stou(txt);
330  } catch (std::invalid_argument&) {
331  cliComm.printWarning(
332  "Ignoring bad Generation MSX id (genmsxid) "
333  "in entry with title '", title,
334  ": ", txt);
335  }
336  break;
337  case ORIGINAL:
338  dumps.back().origData = cIndex(txt);
339  break;
340  case TYPE:
341  type = txt;
342  break;
343  case START:
344  startVal = txt;
345  break;
346  case HASH:
347  dumps.back().hash = Sha1Sum(txt);
348  break;
349  case DUMP_REMARK:
350  case DUMP_TEXT:
351  dumps.back().remark = cIndex(txt);
352  break;
353  case BEGIN:
354  case SOFTWAREDB:
355  case SOFTWARE:
356  case DUMP:
357  case ROM:
358  case END:
359  break;
360  default:
361  UNREACHABLE;
362  }
363 }
364 
365 String32 DBParser::cIndex(string_view str)
366 {
367  auto* begin = const_cast<char*>(str.data());
368  auto* end = begin + str.size();
369  *end = 0;
370  String32 result;
371  toString32(bufStart, begin, result);
372  return result;
373 }
374 
375 // called on </software>
376 void DBParser::addEntries()
377 {
378  append(db, view::transform(dumps, [&](auto& d) {
379  return std::pair(d.hash,
380  RomInfo(title, year, company, country, d.origValue,
381  d.origData, d.remark, d.type, genMSXid));
382  }));
383 }
384 
385 // called on </softwaredb>
386 void DBParser::addAllEntries()
387 {
388  // Calculate boundary between old and new entries.
389  // old: [first, mid) already sorted, no duplicates
390  // new: [mid, last) not yet sorted, may have duplicates
391  // there may also be duplicates between old and new
392  const auto first = begin(db);
393  const auto last = end (db);
394  const auto mid = first + initialSize;
395  if (mid == last) return; // no new entries
396 
397  // Sort new entries, old entries are already sorted.
398  std::sort(mid, last, LessTupleElement<0>());
399 
400  // Filter duplicates from new entries. This is similar to the
401  // unique() algorithm, except that it also warns about duplicates.
402  auto it1 = mid;
403  auto it2 = mid + 1;
404  // skip initial non-duplicates
405  while (it2 != last) {
406  if (it1->first == it2->first) break;
407  ++it1; ++it2;
408  }
409  // move non-duplicates up
410  while (it2 != last) {
411  if (it1->first == it2->first) {
412  cliComm.printWarning(
413  "duplicate softwaredb entry SHA1: ",
414  it2->first.toString());
415  } else {
416  ++it1;
417  *it1 = std::move(*it2);
418  }
419  ++it2;
420  }
421  // actually erase the duplicates (typically none)
422  db.erase(it1 + 1, last);
423  // At this point both old and new entries are sorted and unique. But
424  // there may still be duplicates between old and new.
425 
426  // Merge new and old entries. This is similar to the inplace_merge()
427  // algorithm, except that duplicates (between old and new) are removed.
428  if (first == mid) return; // no old entries (common case)
429  RomDatabase::RomDB result;
430  result.reserve(db.size());
431  it1 = first;
432  it2 = mid;
433  // while both new and old still have elements
434  while (it1 != mid && it2 != last) {
435  if (it1->first < it2->first) {
436  result.push_back(std::move(*it1));
437  ++it1;
438  } else {
439  if (it1->first != it2->first) { // *it2 < *it1
440  result.push_back(std::move(*it2));
441  ++it2;
442  } else {
443  // pick old entry, silently ignore new
444  result.push_back(std::move(*it1));
445  ++it1; ++it2;
446  }
447  }
448  }
449  // move remaining old or new entries (one of these is empty)
450  move(it1, mid, back_inserter(result));
451  move(it2, last, back_inserter(result));
452 
453  // make result the new current database
454  swap(result, db);
455 }
456 
457 static const char* parseStart(string_view s)
458 {
459  // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
460  return ((s.size() == 6) && StringOp::startsWith(s, "0x")) ? (s.data() + 2) : nullptr;
461 }
462 
464 {
465  if (unknownLevel) {
466  --unknownLevel;
467  return;
468  }
469 
470  switch (state) {
471  case SOFTWAREDB:
472  addAllEntries();
473  state = END;
474  break;
475  case SOFTWARE:
476  addEntries();
477  state = SOFTWAREDB;
478  break;
479  case SYSTEM:
480  case TITLE:
481  case COMPANY:
482  case YEAR:
483  case COUNTRY:
484  case GENMSXID:
485  state = SOFTWARE;
486  break;
487  case DUMP:
488  if (dumps.back().hash.empty()) {
489  // no sha1 sum specified, drop this dump
490  dumps.pop_back();
491  }
492  state = SOFTWARE;
493  break;
494  case ORIGINAL:
495  state = DUMP;
496  break;
497  case ROM: {
498  string_view t = type;
499  char buf[12];
500  if (small_compare<'M','i','r','r','o','r','e','d'>(t)) {
501  if (const char* s = parseStart(startVal)) {
502  memcpy(buf, t.data(), 8);
503  memcpy(buf + 8, s, 4);
504  t = string_view(buf, 12);
505  }
506  } else if (small_compare<'N','o','r','m','a','l'>(t)) {
507  if (const char* s = parseStart(startVal)) {
508  memcpy(buf, t.data(), 6);
509  memcpy(buf + 6, s, 4);
510  t = string_view(buf, 10);
511  }
512  }
513  RomType romType = RomInfo::nameToRomType(t);
514  if (romType == ROM_UNKNOWN) {
515  unknownTypes[string(t)]++;
516  }
517  dumps.back().type = romType;
518  state = DUMP;
519  break;
520  }
521  case TYPE:
522  case START:
523  case HASH:
524  case DUMP_REMARK:
525  state = ROM;
526  break;
527  case DUMP_TEXT:
528  state = DUMP_REMARK;
529  break;
530  case BEGIN:
531  case END:
532  throw MSXException("Unexpected closing tag");
533 
534  default:
535  UNREACHABLE;
536  }
537 }
538 
539 void DBParser::doctype(string_view txt)
540 {
541  auto pos1 = txt.find(" SYSTEM \"");
542  if (pos1 == string_view::npos) return;
543  auto t = txt.substr(pos1 + 9);
544  auto pos2 = t.find('"');
545  if (pos2 == string_view::npos) return;
546  systemID = t.substr(0, pos2);
547 }
548 
549 static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
550  RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
551 {
552  DBParser handler(db, unknownTypes, cliComm, bufStart);
553  rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
554 
555  if (handler.getSystemID() != "softwaredb1.dtd") {
556  throw rapidsax::ParseError(
557  "Missing or wrong systemID.\n"
558  "You're probably using an old incompatible file format.",
559  nullptr);
560  }
561 }
562 
564 {
565  db.reserve(3500);
566  UnknownTypes unknownTypes;
567  // first user- then system-directory
568  vector<string> paths = systemFileContext().getPaths();
569  vector<File> files;
570  size_t bufferSize = 0;
571  for (auto& p : paths) {
572  try {
573  auto& f = files.emplace_back(FileOperations::join(p, "softwaredb.xml"));
574  bufferSize += f.getSize() + rapidsax::EXTRA_BUFFER_SPACE;
575  } catch (MSXException& /*e*/) {
576  // Ignore. It's not unusual the DB in the user
577  // directory is not found. In case there's an error
578  // with both user and system DB, we must give a
579  // warning, but that's done below.
580  }
581  }
582  buffer.resize(bufferSize);
583  size_t bufferOffset = 0;
584  for (auto& file : files) {
585  try {
586  auto size = file.getSize();
587  auto* buf = &buffer[bufferOffset];
588  bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
589  file.read(buf, size);
590  buf[size] = 0;
591 
592  parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
593  } catch (rapidsax::ParseError& e) {
594  cliComm.printWarning(
595  "Rom database parsing failed: ", e.what());
596  } catch (MSXException& /*e*/) {
597  // Ignore, see above
598  }
599  }
600  if (bufferSize) buffer[0] = 0;
601  if (db.empty()) {
602  cliComm.printWarning(
603  "Couldn't load software database.\n"
604  "This may cause incorrect ROM mapper types to be used.");
605  }
606  if (!unknownTypes.empty()) {
607  string output = "Unknown mapper types in software database: ";
608  for (const auto& [type, count] : unknownTypes) {
609  strAppend(output, type, " (", count, "x); ");
610  }
611  cliComm.printWarning(output);
612  }
613 }
614 
615 const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
616 {
617  auto it = ranges::lower_bound(db, sha1sum, LessTupleElement<0>());
618  return ((it != end(db)) && (it->first == sha1sum))
619  ? &it->second : nullptr;
620 }
621 
622 } // namespace openmsx
auto transform(Range &&range, UnaryOp op)
Definition: view.hh:306
unsigned fast_stou(string_view s)
Definition: StringOp.cc:265
constexpr void sort(RAIt first, RAIt last, Compare cmp=Compare{})
Definition: cstd.hh:63
FileContext systemFileContext()
Definition: FileContext.cc:149
bool startsWith(string_view total, string_view part)
Definition: StringOp.cc:71
size_t size(std::string_view utf8)
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition: String32.hh:22
void toString32(const char *buffer, const char *str, uint32_t &result)
Definition: String32.hh:25
void text(string_view txt)
Definition: RomDatabase.cc:307
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:644
static RomType nameToRomType(std::string_view name)
Definition: RomInfo.cc:189
string_view getSystemID() const
Definition: RomDatabase.cc:50
void append(Result &)
Definition: stl.hh:340
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:19
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:207
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
void printWarning(std::string_view message)
Definition: CliComm.cc:10
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
Definition: RomDatabase.cc:615
bool stringToBool(string_view str)
Definition: StringOp.cc:43
std::vector< std::string > getPaths() const
Definition: FileContext.cc:104
bool empty() const
Definition: hash_set.hh:520
RomDatabase(CliComm &cliComm)
Definition: RomDatabase.cc:563
auto lower_bound(ForwardRange &&range, const T &value)
Definition: ranges.hh:71
string join(string_view part1, string_view part2)
void attribute(string_view name, string_view value)
Definition: RomDatabase.cc:274
const char * what() const
Definition: rapidsax.hh:110
void doctype(string_view txt)
Definition: RomDatabase.cc:539
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
Definition: RomDatabase.cc:31
constexpr size_t EXTRA_BUFFER_SPACE
Definition: rapidsax.hh:40
TclObject t
void start(string_view tag)
Definition: RomDatabase.cc:108
std::vector< std::pair< Sha1Sum, RomInfo > > RomDB
Definition: RomDatabase.hh:17
#define UNREACHABLE
Definition: unreachable.hh:38