openMSX
RomDatabase.cc
Go to the documentation of this file.
1 #include "RomDatabase.hh"
2 #include "RomInfo.hh"
3 #include "FileContext.hh"
4 #include "File.hh"
5 #include "FileOperations.hh"
6 #include "CliComm.hh"
7 #include "MSXException.hh"
8 #include "StringOp.hh"
9 #include "String32.hh"
10 #include "hash_map.hh"
11 #include "ranges.hh"
12 #include "rapidsax.hh"
13 #include "unreachable.hh"
14 #include "stl.hh"
15 #include "view.hh"
16 #include "xxhash.hh"
17 #include <cassert>
18 #include <stdexcept>
19 
20 using std::string;
21 using std::string_view;
22 using std::vector;
23 
24 namespace openmsx {
25 
27 
29 {
30 public:
32  CliComm& cliComm_, char* bufStart_)
33  : db(db_)
34  , unknownTypes(unknownTypes_)
35  , cliComm(cliComm_)
36  , bufStart(bufStart_)
37  , state(BEGIN)
38  , unknownLevel(0)
39  , initialSize(db.size())
40  {
41  }
42 
43  // rapidsax handler interface
44  void start(string_view tag);
45  void attribute(string_view name, string_view value);
46  void text(string_view txt);
47  void stop();
48  void doctype(string_view txt);
49 
50  [[nodiscard]] string_view getSystemID() const { return systemID; }
51 
52 private:
53  [[nodiscard]] String32 cIndex(string_view str);
54  void addEntries();
55  void addAllEntries();
56 
57  enum State {
58  BEGIN,
59  SOFTWAREDB,
60  SOFTWARE,
61  SYSTEM,
62  TITLE,
63  COMPANY,
64  YEAR,
65  COUNTRY,
66  GENMSXID,
67  DUMP_REMARK,
68  DUMP_TEXT,
69  DUMP,
70  ORIGINAL,
71  ROM,
72  TYPE,
73  START,
74  HASH,
75  END
76  };
77 
78  struct Dump {
79  String32 remark;
80  Sha1Sum hash;
81  String32 origData;
82  RomType type;
83  bool origValue;
84  };
85 
87  UnknownTypes& unknownTypes;
88  CliComm& cliComm;
89  char* bufStart;
90 
91  string_view systemID;
92  string_view type;
93  string_view startVal;
94 
95  vector<Dump> dumps;
96  string_view system;
97  String32 title;
98  String32 company;
99  String32 year;
100  String32 country;
101  int genMSXid;
102 
103  State state;
104  unsigned unknownLevel;
105  size_t initialSize;
106 };
107 
108 void DBParser::start(string_view tag)
109 {
110  if (unknownLevel) {
111  ++unknownLevel;
112  return;
113  }
114 
115  assert(!tag.empty()); // rapidsax will reject empty tags
116  switch (state) {
117  case BEGIN:
118  if (tag == "softwaredb") {
119  state = SOFTWAREDB;
120  return;
121  }
122  throw MSXException("Expected <softwaredb> as root tag.");
123  case SOFTWAREDB:
124  if (small_compare<'s','o','f','t','w','a','r','e'>(tag)) {
125  system = string_view();
126  toString32(bufStart, bufStart, title);
127  toString32(bufStart, bufStart, company);
128  toString32(bufStart, bufStart, year);
129  toString32(bufStart, bufStart, country);
130  genMSXid = 0;
131  dumps.clear();
132  state = SOFTWARE;
133  return;
134  }
135  break;
136  case SOFTWARE: {
137  switch (tag.front()) {
138  case 's':
139  if (small_compare<'s','y','s','t','e','m'>(tag)) {
140  state = SYSTEM;
141  return;
142  }
143  break;
144  case 't':
145  tag.remove_prefix(1);
146  if (small_compare<'i','t','l','e'>(tag)) {
147  state = TITLE;
148  return;
149  }
150  break;
151  case 'c':
152  if (small_compare<'c','o','m','p','a','n','y'>(tag)) {
153  state = COMPANY;
154  return;
155  } else if (small_compare<'c','o','u','n','t','r','y'>(tag)) {
156  state = COUNTRY;
157  return;
158  }
159  break;
160  case 'y':
161  if (small_compare<'y','e','a','r'>(tag)) {
162  state = YEAR;
163  return;
164  }
165  break;
166  case 'g':
167  if (small_compare<'g','e','n','m','s','x','i','d'>(tag)) {
168  state = GENMSXID;
169  return;
170  }
171  break;
172  case 'd':
173  if (small_compare<'d','u','m','p'>(tag)) {
174  dumps.resize(dumps.size() + 1);
175  dumps.back().type = ROM_UNKNOWN;
176  dumps.back().origValue = false;
177  toString32(bufStart, bufStart, dumps.back().remark);
178  toString32(bufStart, bufStart, dumps.back().origData);
179  state = DUMP;
180  return;
181  }
182  break;
183  }
184  break;
185  }
186  case DUMP: {
187  switch (tag.front()) {
188  case 'o':
189  if (small_compare<'o','r','i','g','i','n','a','l'>(tag)) {
190  dumps.back().origValue = false;
191  state = ORIGINAL;
192  return;
193  }
194  break;
195  case 'm':
196  if (small_compare<'m','e','g','a','r','o','m'>(tag)) {
197  type = string_view();
198  startVal = string_view();
199  state = ROM;
200  return;
201  }
202  break;
203  case 'r':
204  tag.remove_prefix(1);
205  if (small_compare<'o','m'>(tag)) {
206  type = "Mirrored";
207  startVal = string_view();
208  state = ROM;
209  return;
210  }
211  break;
212  }
213  break;
214  }
215  case ROM: {
216  switch (tag.front()) {
217  case 't':
218  if (small_compare<'t','y','p','e'>(tag)) {
219  state = TYPE;
220  return;
221  }
222  break;
223  case 's':
224  tag.remove_prefix(1);
225  if (small_compare<'t','a','r','t'>(tag)) {
226  state = START;
227  return;
228  }
229  break;
230  case 'r':
231  if (small_compare<'r','e','m','a','r','k'>(tag)) {
232  state = DUMP_REMARK;
233  return;
234  }
235  break;
236  case 'h':
237  if (small_compare<'h','a','s','h'>(tag)) {
238  state = HASH;
239  return;
240  }
241  break;
242  }
243  break;
244  }
245  case DUMP_REMARK:
246  if (small_compare<'t','e','x','t'>(tag)) {
247  state = DUMP_TEXT;
248  return;
249  }
250  break;
251  case SYSTEM:
252  case TITLE:
253  case COMPANY:
254  case YEAR:
255  case COUNTRY:
256  case GENMSXID:
257  case ORIGINAL:
258  case TYPE:
259  case START:
260  case HASH:
261  case DUMP_TEXT:
262  break;
263 
264  case END:
265  throw MSXException("Unexpected opening tag: ", tag);
266 
267  default:
268  UNREACHABLE;
269  }
270 
271  ++unknownLevel;
272 }
273 
274 void DBParser::attribute(string_view name, string_view value)
275 {
276  if (unknownLevel) return;
277 
278  switch (state) {
279  case ORIGINAL:
280  if (small_compare<'v','a','l','u','e'>(name)) {
281  dumps.back().origValue = StringOp::stringToBool(value);
282  }
283  break;
284  case HASH:
285  case BEGIN:
286  case SOFTWAREDB:
287  case SOFTWARE:
288  case SYSTEM:
289  case TITLE:
290  case COMPANY:
291  case YEAR:
292  case COUNTRY:
293  case GENMSXID:
294  case DUMP_REMARK:
295  case DUMP_TEXT:
296  case DUMP:
297  case ROM:
298  case TYPE:
299  case START:
300  case END:
301  break;
302  default:
303  UNREACHABLE;
304  }
305 }
306 
307 void DBParser::text(string_view txt)
308 {
309  if (unknownLevel) return;
310 
311  switch (state) {
312  case SYSTEM:
313  system = txt;
314  break;
315  case TITLE:
316  title = cIndex(txt);
317  break;
318  case COMPANY:
319  company = cIndex(txt);
320  break;
321  case YEAR:
322  year = cIndex(txt);
323  break;
324  case COUNTRY:
325  country = cIndex(txt);
326  break;
327  case GENMSXID: {
328  auto g = StringOp::stringToBase<10, unsigned>(txt);
329  if (!g) {
330  cliComm.printWarning(
331  "Ignoring bad Generation MSX id (genmsxid) "
332  "in entry with title '", fromString32(bufStart, title),
333  ": ", txt);
334  }
335  genMSXid = *g;
336  break;
337  }
338  case ORIGINAL:
339  dumps.back().origData = cIndex(txt);
340  break;
341  case TYPE:
342  type = txt;
343  break;
344  case START:
345  startVal = txt;
346  break;
347  case HASH:
348  try {
349  dumps.back().hash = Sha1Sum(txt);
350  } catch (MSXException& e) {
351  cliComm.printWarning(
352  "Ignoring bad dump for '", fromString32(bufStart, title),
353  "': ", e.getMessage());
354  }
355  break;
356  case DUMP_REMARK:
357  case DUMP_TEXT:
358  dumps.back().remark = cIndex(txt);
359  break;
360  case BEGIN:
361  case SOFTWAREDB:
362  case SOFTWARE:
363  case DUMP:
364  case ROM:
365  case END:
366  break;
367  default:
368  UNREACHABLE;
369  }
370 }
371 
372 String32 DBParser::cIndex(string_view str)
373 {
374  auto* begin = const_cast<char*>(str.data());
375  auto* end = begin + str.size();
376  *end = 0;
377  String32 result;
378  toString32(bufStart, begin, result);
379  return result;
380 }
381 
382 // called on </software>
383 void DBParser::addEntries()
384 {
385  append(db, view::transform(dumps, [&](auto& d) {
386  return std::pair(d.hash,
387  RomInfo(title, year, company, country, d.origValue,
388  d.origData, d.remark, d.type, genMSXid));
389  }));
390 }
391 
392 // called on </softwaredb>
393 void DBParser::addAllEntries()
394 {
395  // Calculate boundary between old and new entries.
396  // old: [first, mid) already sorted, no duplicates
397  // new: [mid, last) not yet sorted, may have duplicates
398  // there may also be duplicates between old and new
399  const auto first = begin(db);
400  const auto last = end (db);
401  const auto mid = first + initialSize;
402  if (mid == last) return; // no new entries
403 
404  // Sort new entries, old entries are already sorted.
405  std::sort(mid, last, LessTupleElement<0>());
406 
407  // Filter duplicates from new entries. This is similar to the
408  // unique() algorithm, except that it also warns about duplicates.
409  auto it1 = mid;
410  auto it2 = mid + 1;
411  // skip initial non-duplicates
412  while (it2 != last) {
413  if (it1->first == it2->first) break;
414  ++it1; ++it2;
415  }
416  // move non-duplicates up
417  while (it2 != last) {
418  if (it1->first == it2->first) {
419  cliComm.printWarning(
420  "duplicate softwaredb entry SHA1: ",
421  it2->first.toString());
422  } else {
423  ++it1;
424  *it1 = std::move(*it2);
425  }
426  ++it2;
427  }
428  // actually erase the duplicates (typically none)
429  db.erase(it1 + 1, last);
430  // At this point both old and new entries are sorted and unique. But
431  // there may still be duplicates between old and new.
432 
433  // Merge new and old entries. This is similar to the inplace_merge()
434  // algorithm, except that duplicates (between old and new) are removed.
435  if (first == mid) return; // no old entries (common case)
436  RomDatabase::RomDB result;
437  result.reserve(db.size());
438  it1 = first;
439  it2 = mid;
440  // while both new and old still have elements
441  while (it1 != mid && it2 != last) {
442  if (it1->first < it2->first) {
443  result.push_back(std::move(*it1));
444  ++it1;
445  } else {
446  if (it1->first != it2->first) { // *it2 < *it1
447  result.push_back(std::move(*it2));
448  ++it2;
449  } else {
450  // pick old entry, silently ignore new
451  result.push_back(std::move(*it1));
452  ++it1; ++it2;
453  }
454  }
455  }
456  // move remaining old or new entries (one of these is empty)
457  move(it1, mid, back_inserter(result));
458  move(it2, last, back_inserter(result));
459 
460  // make result the new current database
461  swap(result, db);
462 }
463 
464 static const char* parseStart(string_view s)
465 {
466  // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
467  return ((s.size() == 6) && StringOp::startsWith(s, "0x")) ? (s.data() + 2) : nullptr;
468 }
469 
471 {
472  if (unknownLevel) {
473  --unknownLevel;
474  return;
475  }
476 
477  switch (state) {
478  case SOFTWAREDB:
479  addAllEntries();
480  state = END;
481  break;
482  case SOFTWARE:
483  addEntries();
484  state = SOFTWAREDB;
485  break;
486  case SYSTEM:
487  case TITLE:
488  case COMPANY:
489  case YEAR:
490  case COUNTRY:
491  case GENMSXID:
492  state = SOFTWARE;
493  break;
494  case DUMP:
495  if (dumps.back().hash.empty()) {
496  // no sha1 sum specified, drop this dump
497  dumps.pop_back();
498  }
499  state = SOFTWARE;
500  break;
501  case ORIGINAL:
502  state = DUMP;
503  break;
504  case ROM: {
505  string_view t = type;
506  char buf[12];
507  if (small_compare<'M','i','r','r','o','r','e','d'>(t)) {
508  if (const char* s = parseStart(startVal)) {
509  memcpy(buf, t.data(), 8);
510  memcpy(buf + 8, s, 4);
511  t = string_view(buf, 12);
512  }
513  } else if (small_compare<'N','o','r','m','a','l'>(t)) {
514  if (const char* s = parseStart(startVal)) {
515  memcpy(buf, t.data(), 6);
516  memcpy(buf + 6, s, 4);
517  t = string_view(buf, 10);
518  }
519  }
520  RomType romType = RomInfo::nameToRomType(t);
521  if (romType == ROM_UNKNOWN) {
522  unknownTypes[string(t)]++;
523  }
524  dumps.back().type = romType;
525  state = DUMP;
526  break;
527  }
528  case TYPE:
529  case START:
530  case HASH:
531  case DUMP_REMARK:
532  state = ROM;
533  break;
534  case DUMP_TEXT:
535  state = DUMP_REMARK;
536  break;
537  case BEGIN:
538  case END:
539  throw MSXException("Unexpected closing tag");
540 
541  default:
542  UNREACHABLE;
543  }
544 }
545 
546 void DBParser::doctype(string_view txt)
547 {
548  auto pos1 = txt.find(" SYSTEM \"");
549  if (pos1 == string_view::npos) return;
550  auto t = txt.substr(pos1 + 9);
551  auto pos2 = t.find('"');
552  if (pos2 == string_view::npos) return;
553  systemID = t.substr(0, pos2);
554 }
555 
556 static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
557  RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
558 {
559  DBParser handler(db, unknownTypes, cliComm, bufStart);
560  rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
561 
562  if (handler.getSystemID() != "softwaredb1.dtd") {
563  throw rapidsax::ParseError(
564  "Missing or wrong systemID.\n"
565  "You're probably using an old incompatible file format.",
566  nullptr);
567  }
568 }
569 
571 {
572  db.reserve(3500);
573  UnknownTypes unknownTypes;
574  // first user- then system-directory
575  vector<string> paths = systemFileContext().getPaths();
576  vector<File> files;
577  size_t bufferSize = 0;
578  for (auto& p : paths) {
579  try {
580  auto& f = files.emplace_back(p + "/softwaredb.xml");
581  bufferSize += f.getSize() + rapidsax::EXTRA_BUFFER_SPACE;
582  } catch (MSXException& /*e*/) {
583  // Ignore. It's not unusual the DB in the user
584  // directory is not found. In case there's an error
585  // with both user and system DB, we must give a
586  // warning, but that's done below.
587  }
588  }
589  buffer.resize(bufferSize);
590  size_t bufferOffset = 0;
591  for (auto& file : files) {
592  try {
593  auto size = file.getSize();
594  auto* buf = &buffer[bufferOffset];
595  bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
596  file.read(buf, size);
597  buf[size] = 0;
598 
599  parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
600  } catch (rapidsax::ParseError& e) {
601  cliComm.printWarning(
602  "Rom database parsing failed: ", e.what());
603  } catch (MSXException& /*e*/) {
604  // Ignore, see above
605  }
606  }
607  if (bufferSize) buffer[0] = 0;
608  if (db.empty()) {
609  cliComm.printWarning(
610  "Couldn't load software database.\n"
611  "This may cause incorrect ROM mapper types to be used.");
612  }
613  if (!unknownTypes.empty()) {
614  string output = "Unknown mapper types in software database: ";
615  for (const auto& [type, count] : unknownTypes) {
616  strAppend(output, type, " (", count, "x); ");
617  }
618  cliComm.printWarning(output);
619  }
620 }
621 
622 const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
623 {
624  auto it = ranges::lower_bound(db, sha1sum, LessTupleElement<0>());
625  return ((it != end(db)) && (it->first == sha1sum))
626  ? &it->second : nullptr;
627 }
628 
629 } // namespace openmsx
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
int g
constexpr void toString32(const char *buffer, const char *str, uint32_t &result)
Definition: String32.hh:25
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition: String32.hh:22
constexpr const char * fromString32(const char *buffer, uint32_t str32)
Definition: String32.hh:35
TclObject t
bool empty() const
Definition: hash_set.hh:533
void printWarning(std::string_view message)
Definition: CliComm.cc:10
void doctype(string_view txt)
Definition: RomDatabase.cc:546
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
Definition: RomDatabase.cc:31
void attribute(string_view name, string_view value)
Definition: RomDatabase.cc:274
void start(string_view tag)
Definition: RomDatabase.cc:108
string_view getSystemID() const
Definition: RomDatabase.cc:50
void text(string_view txt)
Definition: RomDatabase.cc:307
const std::vector< std::string > & getPaths() const
Definition: FileContext.cc:109
const std::string & getMessage() const &
Definition: MSXException.hh:23
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:111
std::vector< std::pair< Sha1Sum, RomInfo > > RomDB
Definition: RomDatabase.hh:17
RomDatabase(CliComm &cliComm)
Definition: RomDatabase.cc:570
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
Definition: RomDatabase.cc:622
static RomType nameToRomType(std::string_view name)
Definition: RomInfo.cc:179
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:22
const char * what() const
Definition: rapidsax.hh:111
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:207
bool stringToBool(string_view str)
Definition: StringOp.cc:16
bool startsWith(string_view total, string_view part)
Definition: StringOp.cc:33
constexpr void sort(RAIt first, RAIt last, Compare cmp=Compare{})
Definition: cstd.hh:64
This file implemented 3 utility functions:
Definition: Autofire.cc:9
const FileContext & systemFileContext()
Definition: FileContext.cc:156
hash_map< string, unsigned, XXHasher > UnknownTypes
Definition: RomDatabase.cc:26
@ ROM_UNKNOWN
Definition: RomTypes.hh:90
auto lower_bound(ForwardRange &&range, const T &value)
Definition: ranges.hh:71
constexpr size_t EXTRA_BUFFER_SPACE
Definition: rapidsax.hh:41
size_t size(std::string_view utf8)
constexpr auto transform(Range &&range, UnaryOp op)
Definition: view.hh:306
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:669
#define UNREACHABLE
Definition: unreachable.hh:38
constexpr auto begin(const zstring_view &x)
Definition: zstring_view.hh:82
constexpr auto end(const zstring_view &x)
Definition: zstring_view.hh:83