openMSX
RomDatabase.cc
Go to the documentation of this file.
1 #include "RomDatabase.hh"
2 #include "FileContext.hh"
3 #include "File.hh"
4 #include "CliComm.hh"
5 #include "MSXException.hh"
6 #include "StringOp.hh"
7 #include "String32.hh"
8 #include "hash_map.hh"
9 #include "ranges.hh"
10 #include "rapidsax.hh"
11 #include "unreachable.hh"
12 #include "stl.hh"
13 #include "view.hh"
14 #include "xxhash.hh"
15 #include <cassert>
16 
17 using std::string_view;
18 
19 namespace openmsx {
20 
22 
24 {
25 public:
27  CliComm& cliComm_, char* bufStart_)
28  : db(db_)
29  , unknownTypes(unknownTypes_)
30  , cliComm(cliComm_)
31  , bufStart(bufStart_)
32  , state(BEGIN)
33  , unknownLevel(0)
34  , initialSize(db.size())
35  {
36  }
37 
38  // rapidsax handler interface
39  void start(string_view tag);
40  void attribute(string_view name, string_view value);
41  void text(string_view txt);
42  void stop();
43  void doctype(string_view txt);
44 
45  [[nodiscard]] string_view getSystemID() const { return systemID; }
46 
47 private:
48  [[nodiscard]] String32 cIndex(string_view str);
49  void addEntries();
50  void addAllEntries();
51 
52  enum State {
53  BEGIN,
54  SOFTWAREDB,
55  SOFTWARE,
56  SYSTEM,
57  TITLE,
58  COMPANY,
59  YEAR,
60  COUNTRY,
61  GENMSXID,
62  DUMP_REMARK,
63  DUMP_TEXT,
64  DUMP,
65  ORIGINAL,
66  ROM,
67  TYPE,
68  START,
69  HASH,
70  END
71  };
72 
73  struct Dump {
74  String32 remark;
75  Sha1Sum hash;
76  String32 origData;
77  RomType type;
78  bool origValue;
79  };
80 
82  UnknownTypes& unknownTypes;
83  CliComm& cliComm;
84  char* bufStart;
85 
86  string_view systemID;
87  string_view type;
88  string_view startVal;
89 
90  std::vector<Dump> dumps;
91  string_view system;
92  String32 title;
93  String32 company;
94  String32 year;
95  String32 country;
96  int genMSXid;
97 
98  State state;
99  unsigned unknownLevel;
100  size_t initialSize;
101 };
102 
103 void DBParser::start(string_view tag)
104 {
105  if (unknownLevel) {
106  ++unknownLevel;
107  return;
108  }
109 
110  assert(!tag.empty()); // rapidsax will reject empty tags
111  switch (state) {
112  case BEGIN:
113  if (tag == "softwaredb") {
114  state = SOFTWAREDB;
115  return;
116  }
117  throw MSXException("Expected <softwaredb> as root tag.");
118  case SOFTWAREDB:
119  if (small_compare<'s','o','f','t','w','a','r','e'>(tag)) {
120  system = string_view();
121  toString32(bufStart, bufStart, title);
122  toString32(bufStart, bufStart, company);
123  toString32(bufStart, bufStart, year);
124  toString32(bufStart, bufStart, country);
125  genMSXid = 0;
126  dumps.clear();
127  state = SOFTWARE;
128  return;
129  }
130  break;
131  case SOFTWARE: {
132  switch (tag.front()) {
133  case 's':
134  if (small_compare<'s','y','s','t','e','m'>(tag)) {
135  state = SYSTEM;
136  return;
137  }
138  break;
139  case 't':
140  tag.remove_prefix(1);
141  if (small_compare<'i','t','l','e'>(tag)) {
142  state = TITLE;
143  return;
144  }
145  break;
146  case 'c':
147  if (small_compare<'c','o','m','p','a','n','y'>(tag)) {
148  state = COMPANY;
149  return;
150  } else if (small_compare<'c','o','u','n','t','r','y'>(tag)) {
151  state = COUNTRY;
152  return;
153  }
154  break;
155  case 'y':
156  if (small_compare<'y','e','a','r'>(tag)) {
157  state = YEAR;
158  return;
159  }
160  break;
161  case 'g':
162  if (small_compare<'g','e','n','m','s','x','i','d'>(tag)) {
163  state = GENMSXID;
164  return;
165  }
166  break;
167  case 'd':
168  if (small_compare<'d','u','m','p'>(tag)) {
169  dumps.resize(dumps.size() + 1);
170  dumps.back().type = ROM_UNKNOWN;
171  dumps.back().origValue = false;
172  toString32(bufStart, bufStart, dumps.back().remark);
173  toString32(bufStart, bufStart, dumps.back().origData);
174  state = DUMP;
175  return;
176  }
177  break;
178  }
179  break;
180  }
181  case DUMP: {
182  switch (tag.front()) {
183  case 'o':
184  if (small_compare<'o','r','i','g','i','n','a','l'>(tag)) {
185  dumps.back().origValue = false;
186  state = ORIGINAL;
187  return;
188  }
189  break;
190  case 'm':
191  if (small_compare<'m','e','g','a','r','o','m'>(tag)) {
192  type = string_view();
193  startVal = string_view();
194  state = ROM;
195  return;
196  }
197  break;
198  case 'r':
199  tag.remove_prefix(1);
200  if (small_compare<'o','m'>(tag)) {
201  type = "Mirrored";
202  startVal = string_view();
203  state = ROM;
204  return;
205  }
206  break;
207  }
208  break;
209  }
210  case ROM: {
211  switch (tag.front()) {
212  case 't':
213  if (small_compare<'t','y','p','e'>(tag)) {
214  state = TYPE;
215  return;
216  }
217  break;
218  case 's':
219  tag.remove_prefix(1);
220  if (small_compare<'t','a','r','t'>(tag)) {
221  state = START;
222  return;
223  }
224  break;
225  case 'r':
226  if (small_compare<'r','e','m','a','r','k'>(tag)) {
227  state = DUMP_REMARK;
228  return;
229  }
230  break;
231  case 'h':
232  if (small_compare<'h','a','s','h'>(tag)) {
233  state = HASH;
234  return;
235  }
236  break;
237  }
238  break;
239  }
240  case DUMP_REMARK:
241  if (small_compare<'t','e','x','t'>(tag)) {
242  state = DUMP_TEXT;
243  return;
244  }
245  break;
246  case SYSTEM:
247  case TITLE:
248  case COMPANY:
249  case YEAR:
250  case COUNTRY:
251  case GENMSXID:
252  case ORIGINAL:
253  case TYPE:
254  case START:
255  case HASH:
256  case DUMP_TEXT:
257  break;
258 
259  case END:
260  throw MSXException("Unexpected opening tag: ", tag);
261 
262  default:
263  UNREACHABLE;
264  }
265 
266  ++unknownLevel;
267 }
268 
269 void DBParser::attribute(string_view name, string_view value)
270 {
271  if (unknownLevel) return;
272 
273  switch (state) {
274  case ORIGINAL:
275  if (small_compare<'v','a','l','u','e'>(name)) {
276  dumps.back().origValue = StringOp::stringToBool(value);
277  }
278  break;
279  case HASH:
280  case BEGIN:
281  case SOFTWAREDB:
282  case SOFTWARE:
283  case SYSTEM:
284  case TITLE:
285  case COMPANY:
286  case YEAR:
287  case COUNTRY:
288  case GENMSXID:
289  case DUMP_REMARK:
290  case DUMP_TEXT:
291  case DUMP:
292  case ROM:
293  case TYPE:
294  case START:
295  case END:
296  break;
297  default:
298  UNREACHABLE;
299  }
300 }
301 
302 void DBParser::text(string_view txt)
303 {
304  if (unknownLevel) return;
305 
306  switch (state) {
307  case SYSTEM:
308  system = txt;
309  break;
310  case TITLE:
311  title = cIndex(txt);
312  break;
313  case COMPANY:
314  company = cIndex(txt);
315  break;
316  case YEAR:
317  year = cIndex(txt);
318  break;
319  case COUNTRY:
320  country = cIndex(txt);
321  break;
322  case GENMSXID: {
323  auto g = StringOp::stringToBase<10, unsigned>(txt);
324  if (!g) {
325  cliComm.printWarning(
326  "Ignoring bad Generation MSX id (genmsxid) "
327  "in entry with title '", fromString32(bufStart, title),
328  ": ", txt);
329  }
330  genMSXid = *g;
331  break;
332  }
333  case ORIGINAL:
334  dumps.back().origData = cIndex(txt);
335  break;
336  case TYPE:
337  type = txt;
338  break;
339  case START:
340  startVal = txt;
341  break;
342  case HASH:
343  try {
344  dumps.back().hash = Sha1Sum(txt);
345  } catch (MSXException& e) {
346  cliComm.printWarning(
347  "Ignoring bad dump for '", fromString32(bufStart, title),
348  "': ", e.getMessage());
349  }
350  break;
351  case DUMP_REMARK:
352  case DUMP_TEXT:
353  dumps.back().remark = cIndex(txt);
354  break;
355  case BEGIN:
356  case SOFTWAREDB:
357  case SOFTWARE:
358  case DUMP:
359  case ROM:
360  case END:
361  break;
362  default:
363  UNREACHABLE;
364  }
365 }
366 
367 String32 DBParser::cIndex(string_view str)
368 {
369  auto* begin = const_cast<char*>(str.data());
370  auto* end = begin + str.size();
371  *end = 0;
372  String32 result;
373  toString32(bufStart, begin, result);
374  return result;
375 }
376 
377 // called on </software>
378 void DBParser::addEntries()
379 {
380  append(db, view::transform(dumps, [&](auto& d) {
381  return RomDatabase::Entry{
382  d.hash,
383  RomInfo(title, year, company, country, d.origValue,
384  d.origData, d.remark, d.type, genMSXid)};
385  }));
386 }
387 
388 // called on </softwaredb>
389 void DBParser::addAllEntries()
390 {
391  // Calculate boundary between old and new entries.
392  // old: [first, mid) already sorted, no duplicates
393  // new: [mid, last) not yet sorted, may have duplicates
394  // there may also be duplicates between old and new
395  const auto first = begin(db);
396  const auto last = end (db);
397  const auto mid = first + initialSize;
398  if (mid == last) return; // no new entries
399 
400  // Sort new entries, old entries are already sorted.
401  ranges::sort(mid, last, {}, &RomDatabase::Entry::sha1);
402 
403  // Filter duplicates from new entries. This is similar to the
404  // unique() algorithm, except that it also warns about duplicates.
405  auto it1 = mid;
406  auto it2 = mid + 1;
407  // skip initial non-duplicates
408  while (it2 != last) {
409  if (it1->sha1 == it2->sha1) break;
410  ++it1; ++it2;
411  }
412  // move non-duplicates up
413  while (it2 != last) {
414  if (it1->sha1 == it2->sha1) {
415  cliComm.printWarning(
416  "duplicate softwaredb entry SHA1: ",
417  it2->sha1.toString());
418  } else {
419  ++it1;
420  *it1 = std::move(*it2);
421  }
422  ++it2;
423  }
424  // actually erase the duplicates (typically none)
425  db.erase(it1 + 1, last);
426  // At this point both old and new entries are sorted and unique. But
427  // there may still be duplicates between old and new.
428 
429  // Merge new and old entries. This is similar to the inplace_merge()
430  // algorithm, except that duplicates (between old and new) are removed.
431  if (first == mid) return; // no old entries (common case)
432  RomDatabase::RomDB result;
433  result.reserve(db.size());
434  it1 = first;
435  it2 = mid;
436  // while both new and old still have elements
437  while (it1 != mid && it2 != last) {
438  if (it1->sha1 < it2->sha1) {
439  result.push_back(std::move(*it1));
440  ++it1;
441  } else {
442  if (it1->sha1 != it2->sha1) { // *it2 < *it1
443  result.push_back(std::move(*it2));
444  ++it2;
445  } else {
446  // pick old entry, silently ignore new
447  result.push_back(std::move(*it1));
448  ++it1; ++it2;
449  }
450  }
451  }
452  // move remaining old or new entries (one of these is empty)
453  move(it1, mid, back_inserter(result));
454  move(it2, last, back_inserter(result));
455 
456  // make result the new current database
457  swap(result, db);
458 }
459 
460 static const char* parseStart(string_view s)
461 {
462  // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
463  return ((s.size() == 6) && StringOp::startsWith(s, "0x")) ? (s.data() + 2) : nullptr;
464 }
465 
467 {
468  if (unknownLevel) {
469  --unknownLevel;
470  return;
471  }
472 
473  switch (state) {
474  case SOFTWAREDB:
475  addAllEntries();
476  state = END;
477  break;
478  case SOFTWARE:
479  addEntries();
480  state = SOFTWAREDB;
481  break;
482  case SYSTEM:
483  case TITLE:
484  case COMPANY:
485  case YEAR:
486  case COUNTRY:
487  case GENMSXID:
488  state = SOFTWARE;
489  break;
490  case DUMP:
491  if (dumps.back().hash.empty()) {
492  // no sha1 sum specified, drop this dump
493  dumps.pop_back();
494  }
495  state = SOFTWARE;
496  break;
497  case ORIGINAL:
498  state = DUMP;
499  break;
500  case ROM: {
501  string_view t = type;
502  char buf[12];
503  if (small_compare<'M','i','r','r','o','r','e','d'>(t)) {
504  if (const char* s = parseStart(startVal)) {
505  memcpy(buf, t.data(), 8);
506  memcpy(buf + 8, s, 4);
507  t = string_view(buf, 12);
508  }
509  } else if (small_compare<'N','o','r','m','a','l'>(t)) {
510  if (const char* s = parseStart(startVal)) {
511  memcpy(buf, t.data(), 6);
512  memcpy(buf + 6, s, 4);
513  t = string_view(buf, 10);
514  }
515  }
516  RomType romType = RomInfo::nameToRomType(t);
517  if (romType == ROM_UNKNOWN) {
518  unknownTypes[std::string(t)]++;
519  }
520  dumps.back().type = romType;
521  state = DUMP;
522  break;
523  }
524  case TYPE:
525  case START:
526  case HASH:
527  case DUMP_REMARK:
528  state = ROM;
529  break;
530  case DUMP_TEXT:
531  state = DUMP_REMARK;
532  break;
533  case BEGIN:
534  case END:
535  throw MSXException("Unexpected closing tag");
536 
537  default:
538  UNREACHABLE;
539  }
540 }
541 
542 void DBParser::doctype(string_view txt)
543 {
544  auto pos1 = txt.find(" SYSTEM \"");
545  if (pos1 == string_view::npos) return;
546  auto t = txt.substr(pos1 + 9);
547  auto pos2 = t.find('"');
548  if (pos2 == string_view::npos) return;
549  systemID = t.substr(0, pos2);
550 }
551 
552 static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
553  RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
554 {
555  DBParser handler(db, unknownTypes, cliComm, bufStart);
556  rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
557 
558  if (handler.getSystemID() != "softwaredb1.dtd") {
559  throw rapidsax::ParseError(
560  "Missing or wrong systemID.\n"
561  "You're probably using an old incompatible file format.",
562  nullptr);
563  }
564 }
565 
567 {
568  db.reserve(3500);
569  UnknownTypes unknownTypes;
570  // first user- then system-directory
571  std::vector<File> files;
572  size_t bufferSize = 0;
573  for (const auto& p : systemFileContext().getPaths()) {
574  try {
575  auto& f = files.emplace_back(p + "/softwaredb.xml");
576  bufferSize += f.getSize() + rapidsax::EXTRA_BUFFER_SPACE;
577  } catch (MSXException& /*e*/) {
578  // Ignore. It's not unusual the DB in the user
579  // directory is not found. In case there's an error
580  // with both user and system DB, we must give a
581  // warning, but that's done below.
582  }
583  }
584  buffer.resize(bufferSize);
585  size_t bufferOffset = 0;
586  for (auto& file : files) {
587  try {
588  auto size = file.getSize();
589  auto* buf = &buffer[bufferOffset];
590  bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
591  file.read(buf, size);
592  buf[size] = 0;
593 
594  parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
595  } catch (rapidsax::ParseError& e) {
596  cliComm.printWarning(
597  "Rom database parsing failed: ", e.what());
598  } catch (MSXException& /*e*/) {
599  // Ignore, see above
600  }
601  }
602  if (bufferSize) buffer[0] = 0;
603  if (db.empty()) {
604  cliComm.printWarning(
605  "Couldn't load software database.\n"
606  "This may cause incorrect ROM mapper types to be used.");
607  }
608  if (!unknownTypes.empty()) {
609  std::string output = "Unknown mapper types in software database: ";
610  for (const auto& [type, count] : unknownTypes) {
611  strAppend(output, type, " (", count, "x); ");
612  }
613  cliComm.printWarning(output);
614  }
615 }
616 
617 const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
618 {
619  auto it = ranges::lower_bound(db, sha1sum, {}, &Entry::sha1);
620  return ((it != end(db)) && (it->sha1 == sha1sum))
621  ? &it->romInfo : nullptr;
622 }
623 
624 } // namespace openmsx
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
int g
constexpr void toString32(const char *buffer, const char *str, uint32_t &result)
Definition: String32.hh:25
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition: String32.hh:22
constexpr const char * fromString32(const char *buffer, uint32_t str32)
Definition: String32.hh:35
TclObject t
bool empty() const
Definition: hash_set.hh:532
void printWarning(std::string_view message)
Definition: CliComm.cc:10
void doctype(string_view txt)
Definition: RomDatabase.cc:542
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
Definition: RomDatabase.cc:26
void attribute(string_view name, string_view value)
Definition: RomDatabase.cc:269
void start(string_view tag)
Definition: RomDatabase.cc:103
string_view getSystemID() const
Definition: RomDatabase.cc:45
void text(string_view txt)
Definition: RomDatabase.cc:302
const std::string & getMessage() const &
Definition: MSXException.hh:23
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:111
RomDatabase(CliComm &cliComm)
Definition: RomDatabase.cc:566
std::vector< Entry > RomDB
Definition: RomDatabase.hh:20
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
Definition: RomDatabase.cc:617
static RomType nameToRomType(std::string_view name)
Definition: RomInfo.cc:174
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:22
const char * what() const
Definition: rapidsax.hh:113
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:207
bool stringToBool(string_view str)
Definition: StringOp.cc:12
bool startsWith(string_view total, string_view part)
Definition: StringOp.cc:29
This file implemented 3 utility functions:
Definition: Autofire.cc:9
const FileContext & systemFileContext()
Definition: FileContext.cc:157
hash_map< std::string, unsigned, XXHasher > UnknownTypes
Definition: RomDatabase.cc:21
@ ROM_UNKNOWN
Definition: RomTypes.hh:90
void sort(RandomAccessRange &&range)
Definition: ranges.hh:34
auto lower_bound(ForwardRange &&range, const T &value, Compare comp={}, Proj proj={})
Definition: ranges.hh:85
constexpr size_t EXTRA_BUFFER_SPACE
Definition: rapidsax.hh:41
size_t size(std::string_view utf8)
constexpr auto transform(Range &&range, UnaryOp op)
Definition: view.hh:392
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:669
#define UNREACHABLE
Definition: unreachable.hh:38
constexpr auto begin(const zstring_view &x)
Definition: zstring_view.hh:83
constexpr auto end(const zstring_view &x)
Definition: zstring_view.hh:84