openMSX
RomDatabase.cc
Go to the documentation of this file.
1#include "RomDatabase.hh"
2#include "FileContext.hh"
3#include "File.hh"
4#include "CliComm.hh"
5#include "MSXException.hh"
6#include "StringOp.hh"
7#include "String32.hh"
8#include "hash_map.hh"
9#include "narrow.hh"
10#include "ranges.hh"
11#include "rapidsax.hh"
12#include "unreachable.hh"
13#include "stl.hh"
14#include "view.hh"
15#include "xxhash.hh"
16#include <array>
17#include <cassert>
18#include <string_view>
19
20using std::string_view;
21
22namespace openmsx {
23
25
27{
28public:
30 CliComm& cliComm_, char* bufStart_)
31 : db(db_)
32 , unknownTypes(unknownTypes_)
33 , cliComm(cliComm_)
34 , bufStart(bufStart_)
35 , initialSize(db.size())
36 {
37 }
38
39 // rapidsax handler interface
40 void start(string_view tag);
41 void attribute(string_view name, string_view value);
42 void text(string_view txt);
43 void stop();
44 void doctype(string_view txt);
45
46 [[nodiscard]] string_view getSystemID() const { return systemID; }
47
48private:
49 [[nodiscard]] String32 cIndex(string_view str);
50 void addEntries();
51 void addAllEntries();
52
53 enum State {
54 BEGIN,
55 SOFTWAREDB,
56 SOFTWARE,
57 SYSTEM,
58 TITLE,
59 COMPANY,
60 YEAR,
61 COUNTRY,
62 GENMSXID,
63 DUMP_REMARK,
64 DUMP_TEXT,
65 DUMP,
66 ORIGINAL,
67 ROM,
68 TYPE,
69 START,
70 HASH,
71 END
72 };
73
74 struct Dump {
75 String32 remark;
76 Sha1Sum hash;
77 String32 origData;
78 RomType type;
79 bool origValue;
80 };
81
83 UnknownTypes& unknownTypes;
84 CliComm& cliComm;
85 char* bufStart;
86
87 string_view systemID;
88 string_view type;
89 string_view startVal;
90
91 std::vector<Dump> dumps;
92 string_view system;
93 String32 title;
94 String32 company;
95 String32 year;
96 String32 country;
97 unsigned genMSXid;
98
99 State state = BEGIN;
100 unsigned unknownLevel = 0;
101 size_t initialSize;
102};
103
104void DBParser::start(string_view tag)
105{
106 if (unknownLevel) {
107 ++unknownLevel;
108 return;
109 }
110
111 assert(!tag.empty()); // rapidsax will reject empty tags
112 switch (state) {
113 case BEGIN:
114 if (tag == "softwaredb") {
115 state = SOFTWAREDB;
116 return;
117 }
118 throw MSXException("Expected <softwaredb> as root tag.");
119 case SOFTWAREDB:
120 if (small_compare<"software">(tag)) {
121 system = string_view();
122 toString32(bufStart, bufStart, title);
123 toString32(bufStart, bufStart, company);
124 toString32(bufStart, bufStart, year);
125 toString32(bufStart, bufStart, country);
126 genMSXid = 0;
127 dumps.clear();
128 state = SOFTWARE;
129 return;
130 }
131 break;
132 case SOFTWARE: {
133 switch (tag.front()) {
134 case 's':
135 if (small_compare<"system">(tag)) {
136 state = SYSTEM;
137 return;
138 }
139 break;
140 case 't':
141 tag.remove_prefix(1);
142 if (small_compare<"itle">(tag)) {
143 state = TITLE;
144 return;
145 }
146 break;
147 case 'c':
148 if (small_compare<"company">(tag)) {
149 state = COMPANY;
150 return;
151 } else if (small_compare<"country">(tag)) {
152 state = COUNTRY;
153 return;
154 }
155 break;
156 case 'y':
157 if (small_compare<"year">(tag)) {
158 state = YEAR;
159 return;
160 }
161 break;
162 case 'g':
163 if (small_compare<"genmsxid">(tag)) {
164 state = GENMSXID;
165 return;
166 }
167 break;
168 case 'd':
169 if (small_compare<"dump">(tag)) {
170 dumps.resize(dumps.size() + 1);
171 dumps.back().type = ROM_UNKNOWN;
172 dumps.back().origValue = false;
173 toString32(bufStart, bufStart, dumps.back().remark);
174 toString32(bufStart, bufStart, dumps.back().origData);
175 state = DUMP;
176 return;
177 }
178 break;
179 }
180 break;
181 }
182 case DUMP: {
183 switch (tag.front()) {
184 case 'o':
185 if (small_compare<"original">(tag)) {
186 dumps.back().origValue = false;
187 state = ORIGINAL;
188 return;
189 }
190 break;
191 case 'm':
192 if (small_compare<"megarom">(tag)) {
193 type = string_view();
194 startVal = string_view();
195 state = ROM;
196 return;
197 }
198 break;
199 case 'r':
200 tag.remove_prefix(1);
201 if (small_compare<"om">(tag)) {
202 type = "Mirrored";
203 startVal = string_view();
204 state = ROM;
205 return;
206 }
207 break;
208 }
209 break;
210 }
211 case ROM: {
212 switch (tag.front()) {
213 case 't':
214 if (small_compare<"type">(tag)) {
215 state = TYPE;
216 return;
217 }
218 break;
219 case 's':
220 tag.remove_prefix(1);
221 if (small_compare<"tart">(tag)) {
222 state = START;
223 return;
224 }
225 break;
226 case 'r':
227 if (small_compare<"remark">(tag)) {
228 state = DUMP_REMARK;
229 return;
230 }
231 break;
232 case 'h':
233 if (small_compare<"hash">(tag)) {
234 state = HASH;
235 return;
236 }
237 break;
238 }
239 break;
240 }
241 case DUMP_REMARK:
242 if (small_compare<"text">(tag)) {
243 state = DUMP_TEXT;
244 return;
245 }
246 break;
247 case SYSTEM:
248 case TITLE:
249 case COMPANY:
250 case YEAR:
251 case COUNTRY:
252 case GENMSXID:
253 case ORIGINAL:
254 case TYPE:
255 case START:
256 case HASH:
257 case DUMP_TEXT:
258 break;
259
260 case END:
261 throw MSXException("Unexpected opening tag: ", tag);
262
263 default:
265 }
266
267 ++unknownLevel;
268}
269
270void DBParser::attribute(string_view name, string_view value)
271{
272 if (unknownLevel) return;
273
274 switch (state) {
275 case ORIGINAL:
276 if (small_compare<"value">(name)) {
277 dumps.back().origValue = StringOp::stringToBool(value);
278 }
279 break;
280 case HASH:
281 case BEGIN:
282 case SOFTWAREDB:
283 case SOFTWARE:
284 case SYSTEM:
285 case TITLE:
286 case COMPANY:
287 case YEAR:
288 case COUNTRY:
289 case GENMSXID:
290 case DUMP_REMARK:
291 case DUMP_TEXT:
292 case DUMP:
293 case ROM:
294 case TYPE:
295 case START:
296 case END:
297 break;
298 default:
300 }
301}
302
303void DBParser::text(string_view txt)
304{
305 if (unknownLevel) return;
306
307 switch (state) {
308 case SYSTEM:
309 system = txt;
310 break;
311 case TITLE:
312 title = cIndex(txt);
313 break;
314 case COMPANY:
315 company = cIndex(txt);
316 break;
317 case YEAR:
318 year = cIndex(txt);
319 break;
320 case COUNTRY:
321 country = cIndex(txt);
322 break;
323 case GENMSXID: {
324 if (auto g = StringOp::stringToBase<10, unsigned>(txt)) {
325 genMSXid = *g;
326 } else {
327 cliComm.printWarning(
328 "Ignoring bad Generation MSX id (genmsxid) "
329 "in entry with title '", fromString32(bufStart, title),
330 ": ", txt);
331 }
332 break;
333 }
334 case ORIGINAL:
335 dumps.back().origData = cIndex(txt);
336 break;
337 case TYPE:
338 type = txt;
339 break;
340 case START:
341 startVal = txt;
342 break;
343 case HASH:
344 try {
345 dumps.back().hash = Sha1Sum(txt);
346 } catch (MSXException& e) {
347 cliComm.printWarning(
348 "Ignoring bad dump for '", fromString32(bufStart, title),
349 "': ", e.getMessage());
350 }
351 break;
352 case DUMP_REMARK:
353 case DUMP_TEXT:
354 dumps.back().remark = cIndex(txt);
355 break;
356 case BEGIN:
357 case SOFTWAREDB:
358 case SOFTWARE:
359 case DUMP:
360 case ROM:
361 case END:
362 break;
363 default:
365 }
366}
367
368String32 DBParser::cIndex(string_view str)
369{
370 auto* begin = const_cast<char*>(str.data());
371 auto* end = begin + str.size();
372 *end = 0;
373 String32 result;
374 toString32(bufStart, begin, result);
375 return result;
376}
377
378// called on </software>
379void DBParser::addEntries()
380{
381 append(db, view::transform(dumps, [&](auto& d) {
382 return RomDatabase::Entry{
383 d.hash,
384 RomInfo(title, year, company, country, d.origValue,
385 d.origData, d.remark, d.type, genMSXid)};
386 }));
387}
388
389// called on </softwaredb>
390void DBParser::addAllEntries()
391{
392 // Calculate boundary between old and new entries.
393 // old: [first, mid) already sorted, no duplicates
394 // new: [mid, last) not yet sorted, may have duplicates
395 // there may also be duplicates between old and new
396 const auto first = begin(db);
397 const auto last = end (db);
398 const auto mid = first + narrow<ptrdiff_t>(initialSize);
399 if (mid == last) return; // no new entries
400
401 // Sort new entries, old entries are already sorted.
402 ranges::sort(mid, last, {}, &RomDatabase::Entry::sha1);
403
404 // Filter duplicates from new entries. This is similar to the
405 // unique() algorithm, except that it also warns about duplicates.
406 auto it1 = mid;
407 auto it2 = mid + 1;
408 // skip initial non-duplicates
409 while (it2 != last) {
410 if (it1->sha1 == it2->sha1) break;
411 ++it1; ++it2;
412 }
413 // move non-duplicates up
414 while (it2 != last) {
415 if (it1->sha1 == it2->sha1) {
416 cliComm.printWarning(
417 "duplicate softwaredb entry SHA1: ",
418 it2->sha1.toString());
419 } else {
420 ++it1;
421 *it1 = std::move(*it2);
422 }
423 ++it2;
424 }
425 // actually erase the duplicates (typically none)
426 db.erase(it1 + 1, last);
427 // At this point both old and new entries are sorted and unique. But
428 // there may still be duplicates between old and new.
429
430 // Merge new and old entries. This is similar to the inplace_merge()
431 // algorithm, except that duplicates (between old and new) are removed.
432 if (first == mid) return; // no old entries (common case)
433 RomDatabase::RomDB result;
434 result.reserve(db.size());
435 it1 = first;
436 it2 = mid;
437 // while both new and old still have elements
438 while (it1 != mid && it2 != last) {
439 if (it1->sha1 < it2->sha1) {
440 result.push_back(std::move(*it1));
441 ++it1;
442 } else {
443 if (it1->sha1 != it2->sha1) { // *it2 < *it1
444 result.push_back(std::move(*it2));
445 ++it2;
446 } else {
447 // pick old entry, silently ignore new
448 result.push_back(std::move(*it1));
449 ++it1; ++it2;
450 }
451 }
452 }
453 // move remaining old or new entries (one of these is empty)
454 move(it1, mid, back_inserter(result));
455 move(it2, last, back_inserter(result));
456
457 // make result the new current database
458 swap(result, db);
459}
460
461static const char* parseStart(string_view s)
462{
463 // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
464 return ((s.size() == 6) && s.starts_with("0x")) ? (s.data() + 2) : nullptr;
465}
466
468{
469 if (unknownLevel) {
470 --unknownLevel;
471 return;
472 }
473
474 switch (state) {
475 case SOFTWAREDB:
476 addAllEntries();
477 state = END;
478 break;
479 case SOFTWARE:
480 addEntries();
481 state = SOFTWAREDB;
482 break;
483 case SYSTEM:
484 case TITLE:
485 case COMPANY:
486 case YEAR:
487 case COUNTRY:
488 case GENMSXID:
489 state = SOFTWARE;
490 break;
491 case DUMP:
492 if (dumps.back().hash.empty()) {
493 // no sha1 sum specified, drop this dump
494 dumps.pop_back();
495 }
496 state = SOFTWARE;
497 break;
498 case ORIGINAL:
499 state = DUMP;
500 break;
501 case ROM: {
502 string_view t = type;
503 std::array<char, 8 + 4> buf;
504 if (small_compare<"Mirrored">(t)) {
505 if (const char* s = parseStart(startVal)) {
506 ranges::copy(t, subspan<8>(buf, 0));
507 ranges::copy(std::string_view(s, 4), subspan<4>(buf, 8));
508 t = string_view(buf.data(), 8 + 4);
509 }
510 } else if (small_compare<"Normal">(t)) {
511 if (const char* s = parseStart(startVal)) {
512 ranges::copy(t, subspan<6>(buf, 0));
513 ranges::copy(std::string_view(s, 4), subspan<4>(buf, 6));
514 t = string_view(buf.data(), 6 + 4);
515 }
516 }
518 if (romType == ROM_UNKNOWN) {
519 unknownTypes[std::string(t)]++;
520 }
521 dumps.back().type = romType;
522 state = DUMP;
523 break;
524 }
525 case TYPE:
526 case START:
527 case HASH:
528 case DUMP_REMARK:
529 state = ROM;
530 break;
531 case DUMP_TEXT:
532 state = DUMP_REMARK;
533 break;
534 case BEGIN:
535 case END:
536 throw MSXException("Unexpected closing tag");
537
538 default:
540 }
541}
542
543void DBParser::doctype(string_view txt)
544{
545 auto pos1 = txt.find(" SYSTEM \"");
546 if (pos1 == string_view::npos) return;
547 auto t = txt.substr(pos1 + 9);
548 auto pos2 = t.find('"');
549 if (pos2 == string_view::npos) return;
550 systemID = t.substr(0, pos2);
551}
552
553static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
554 RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
555{
556 DBParser handler(db, unknownTypes, cliComm, bufStart);
557 rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
558
559 if (handler.getSystemID() != "softwaredb1.dtd") {
561 "Missing or wrong systemID.\n"
562 "You're probably using an old incompatible file format.",
563 nullptr);
564 }
565}
566
568{
569 db.reserve(3500);
570 UnknownTypes unknownTypes;
571 // first user- then system-directory
572 std::vector<File> files;
573 size_t bufferSize = 0;
574 for (const auto& p : systemFileContext().getPaths()) {
575 try {
576 auto& f = files.emplace_back(p + "/softwaredb.xml");
577 bufferSize += f.getSize() + rapidsax::EXTRA_BUFFER_SPACE;
578 } catch (MSXException& /*e*/) {
579 // Ignore. It's not unusual the DB in the user
580 // directory is not found. In case there's an error
581 // with both user and system DB, we must give a
582 // warning, but that's done below.
583 }
584 }
585 buffer.resize(bufferSize);
586 size_t bufferOffset = 0;
587 for (auto& file : files) {
588 try {
589 auto size = file.getSize();
590 auto* buf = &buffer[bufferOffset];
591 bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
592 file.read(std::span{buf, size});
593 buf[size] = 0;
594
595 parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
596 } catch (rapidsax::ParseError& e) {
597 cliComm.printWarning(
598 "Rom database parsing failed: ", e.what());
599 } catch (MSXException& /*e*/) {
600 // Ignore, see above
601 }
602 }
603 if (bufferSize) buffer[0] = 0;
604 if (db.empty()) {
605 cliComm.printWarning(
606 "Couldn't load software database.\n"
607 "This may cause incorrect ROM mapper types to be used.");
608 }
609 if (!unknownTypes.empty()) {
610 std::string output = "Unknown mapper types in software database: ";
611 for (const auto& [type, count] : unknownTypes) {
612 strAppend(output, type, " (", count, "x); ");
613 }
614 cliComm.printWarning(output);
615 }
616}
617
618const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
619{
620 auto d = binary_find(db, sha1sum, {}, &Entry::sha1);
621 return d ? &d->romInfo : nullptr;
622}
623
624} // namespace openmsx
int g
constexpr const char * fromString32(const char *buffer, uint32_t str32)
Definition: String32.hh:34
constexpr void toString32(const char *buffer, const char *str, uint32_t &result)
Definition: String32.hh:25
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition: String32.hh:22
TclObject t
bool empty() const
Definition: hash_set.hh:526
void printWarning(std::string_view message)
Definition: CliComm.cc:10
void doctype(string_view txt)
Definition: RomDatabase.cc:543
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
Definition: RomDatabase.cc:29
void attribute(string_view name, string_view value)
Definition: RomDatabase.cc:270
void start(string_view tag)
Definition: RomDatabase.cc:104
string_view getSystemID() const
Definition: RomDatabase.cc:46
void text(string_view txt)
Definition: RomDatabase.cc:303
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:111
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
RomDatabase(CliComm &cliComm)
Definition: RomDatabase.cc:567
std::vector< Entry > RomDB
Definition: RomDatabase.hh:20
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
Definition: RomDatabase.cc:618
static RomType nameToRomType(std::string_view name)
Definition: RomInfo.cc:174
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:23
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:147
constexpr double e
Definition: Math.hh:21
bool stringToBool(string_view str)
Definition: StringOp.cc:12
This file implemented 3 utility functions:
Definition: Autofire.cc:9
const FileContext & systemFileContext()
Definition: FileContext.cc:155
hash_map< std::string, unsigned, XXHasher > UnknownTypes
Definition: RomDatabase.cc:24
@ ROM_UNKNOWN
Definition: RomTypes.hh:90
auto copy(InputRange &&range, OutputIter out)
Definition: ranges.hh:232
constexpr void sort(RandomAccessRange &&range)
Definition: ranges.hh:49
constexpr size_t EXTRA_BUFFER_SPACE
Definition: rapidsax.hh:42
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
size_t size(std::string_view utf8)
constexpr auto transform(Range &&range, UnaryOp op)
Definition: view.hh:458
auto * binary_find(ForwardRange &&range, const T &value, Compare comp={}, Proj proj={})
Definition: ranges.hh:413
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:620
#define UNREACHABLE
Definition: unreachable.hh:38
constexpr auto begin(const zstring_view &x)
constexpr auto end(const zstring_view &x)