openMSX
RomDatabase.cc
Go to the documentation of this file.
1#include "RomDatabase.hh"
2#include "FileContext.hh"
3#include "File.hh"
4#include "CliComm.hh"
5#include "MSXException.hh"
6#include "StringOp.hh"
7#include "String32.hh"
8#include "hash_map.hh"
9#include "ranges.hh"
10#include "rapidsax.hh"
11#include "unreachable.hh"
12#include "stl.hh"
13#include "view.hh"
14#include "xxhash.hh"
15#include <cassert>
16
17using std::string_view;
18
19namespace openmsx {
20
22
24{
25public:
27 CliComm& cliComm_, char* bufStart_)
28 : db(db_)
29 , unknownTypes(unknownTypes_)
30 , cliComm(cliComm_)
31 , bufStart(bufStart_)
32 , state(BEGIN)
33 , unknownLevel(0)
34 , initialSize(db.size())
35 {
36 }
37
38 // rapidsax handler interface
39 void start(string_view tag);
40 void attribute(string_view name, string_view value);
41 void text(string_view txt);
42 void stop();
43 void doctype(string_view txt);
44
45 [[nodiscard]] string_view getSystemID() const { return systemID; }
46
47private:
48 [[nodiscard]] String32 cIndex(string_view str);
49 void addEntries();
50 void addAllEntries();
51
52 enum State {
53 BEGIN,
54 SOFTWAREDB,
55 SOFTWARE,
56 SYSTEM,
57 TITLE,
58 COMPANY,
59 YEAR,
60 COUNTRY,
61 GENMSXID,
62 DUMP_REMARK,
63 DUMP_TEXT,
64 DUMP,
65 ORIGINAL,
66 ROM,
67 TYPE,
68 START,
69 HASH,
70 END
71 };
72
73 struct Dump {
74 String32 remark;
75 Sha1Sum hash;
76 String32 origData;
77 RomType type;
78 bool origValue;
79 };
80
82 UnknownTypes& unknownTypes;
83 CliComm& cliComm;
84 char* bufStart;
85
86 string_view systemID;
87 string_view type;
88 string_view startVal;
89
90 std::vector<Dump> dumps;
91 string_view system;
92 String32 title;
93 String32 company;
94 String32 year;
95 String32 country;
96 int genMSXid;
97
98 State state;
99 unsigned unknownLevel;
100 size_t initialSize;
101};
102
103void DBParser::start(string_view tag)
104{
105 if (unknownLevel) {
106 ++unknownLevel;
107 return;
108 }
109
110 assert(!tag.empty()); // rapidsax will reject empty tags
111 switch (state) {
112 case BEGIN:
113 if (tag == "softwaredb") {
114 state = SOFTWAREDB;
115 return;
116 }
117 throw MSXException("Expected <softwaredb> as root tag.");
118 case SOFTWAREDB:
119 if (small_compare<"software">(tag)) {
120 system = string_view();
121 toString32(bufStart, bufStart, title);
122 toString32(bufStart, bufStart, company);
123 toString32(bufStart, bufStart, year);
124 toString32(bufStart, bufStart, country);
125 genMSXid = 0;
126 dumps.clear();
127 state = SOFTWARE;
128 return;
129 }
130 break;
131 case SOFTWARE: {
132 switch (tag.front()) {
133 case 's':
134 if (small_compare<"system">(tag)) {
135 state = SYSTEM;
136 return;
137 }
138 break;
139 case 't':
140 tag.remove_prefix(1);
141 if (small_compare<"itle">(tag)) {
142 state = TITLE;
143 return;
144 }
145 break;
146 case 'c':
147 if (small_compare<"company">(tag)) {
148 state = COMPANY;
149 return;
150 } else if (small_compare<"country">(tag)) {
151 state = COUNTRY;
152 return;
153 }
154 break;
155 case 'y':
156 if (small_compare<"year">(tag)) {
157 state = YEAR;
158 return;
159 }
160 break;
161 case 'g':
162 if (small_compare<"genmsxid">(tag)) {
163 state = GENMSXID;
164 return;
165 }
166 break;
167 case 'd':
168 if (small_compare<"dump">(tag)) {
169 dumps.resize(dumps.size() + 1);
170 dumps.back().type = ROM_UNKNOWN;
171 dumps.back().origValue = false;
172 toString32(bufStart, bufStart, dumps.back().remark);
173 toString32(bufStart, bufStart, dumps.back().origData);
174 state = DUMP;
175 return;
176 }
177 break;
178 }
179 break;
180 }
181 case DUMP: {
182 switch (tag.front()) {
183 case 'o':
184 if (small_compare<"original">(tag)) {
185 dumps.back().origValue = false;
186 state = ORIGINAL;
187 return;
188 }
189 break;
190 case 'm':
191 if (small_compare<"megarom">(tag)) {
192 type = string_view();
193 startVal = string_view();
194 state = ROM;
195 return;
196 }
197 break;
198 case 'r':
199 tag.remove_prefix(1);
200 if (small_compare<"om">(tag)) {
201 type = "Mirrored";
202 startVal = string_view();
203 state = ROM;
204 return;
205 }
206 break;
207 }
208 break;
209 }
210 case ROM: {
211 switch (tag.front()) {
212 case 't':
213 if (small_compare<"type">(tag)) {
214 state = TYPE;
215 return;
216 }
217 break;
218 case 's':
219 tag.remove_prefix(1);
220 if (small_compare<"tart">(tag)) {
221 state = START;
222 return;
223 }
224 break;
225 case 'r':
226 if (small_compare<"remark">(tag)) {
227 state = DUMP_REMARK;
228 return;
229 }
230 break;
231 case 'h':
232 if (small_compare<"hash">(tag)) {
233 state = HASH;
234 return;
235 }
236 break;
237 }
238 break;
239 }
240 case DUMP_REMARK:
241 if (small_compare<"text">(tag)) {
242 state = DUMP_TEXT;
243 return;
244 }
245 break;
246 case SYSTEM:
247 case TITLE:
248 case COMPANY:
249 case YEAR:
250 case COUNTRY:
251 case GENMSXID:
252 case ORIGINAL:
253 case TYPE:
254 case START:
255 case HASH:
256 case DUMP_TEXT:
257 break;
258
259 case END:
260 throw MSXException("Unexpected opening tag: ", tag);
261
262 default:
264 }
265
266 ++unknownLevel;
267}
268
269void DBParser::attribute(string_view name, string_view value)
270{
271 if (unknownLevel) return;
272
273 switch (state) {
274 case ORIGINAL:
275 if (small_compare<"value">(name)) {
276 dumps.back().origValue = StringOp::stringToBool(value);
277 }
278 break;
279 case HASH:
280 case BEGIN:
281 case SOFTWAREDB:
282 case SOFTWARE:
283 case SYSTEM:
284 case TITLE:
285 case COMPANY:
286 case YEAR:
287 case COUNTRY:
288 case GENMSXID:
289 case DUMP_REMARK:
290 case DUMP_TEXT:
291 case DUMP:
292 case ROM:
293 case TYPE:
294 case START:
295 case END:
296 break;
297 default:
299 }
300}
301
302void DBParser::text(string_view txt)
303{
304 if (unknownLevel) return;
305
306 switch (state) {
307 case SYSTEM:
308 system = txt;
309 break;
310 case TITLE:
311 title = cIndex(txt);
312 break;
313 case COMPANY:
314 company = cIndex(txt);
315 break;
316 case YEAR:
317 year = cIndex(txt);
318 break;
319 case COUNTRY:
320 country = cIndex(txt);
321 break;
322 case GENMSXID: {
323 auto g = StringOp::stringToBase<10, unsigned>(txt);
324 if (!g) {
325 cliComm.printWarning(
326 "Ignoring bad Generation MSX id (genmsxid) "
327 "in entry with title '", fromString32(bufStart, title),
328 ": ", txt);
329 }
330 genMSXid = *g;
331 break;
332 }
333 case ORIGINAL:
334 dumps.back().origData = cIndex(txt);
335 break;
336 case TYPE:
337 type = txt;
338 break;
339 case START:
340 startVal = txt;
341 break;
342 case HASH:
343 try {
344 dumps.back().hash = Sha1Sum(txt);
345 } catch (MSXException& e) {
346 cliComm.printWarning(
347 "Ignoring bad dump for '", fromString32(bufStart, title),
348 "': ", e.getMessage());
349 }
350 break;
351 case DUMP_REMARK:
352 case DUMP_TEXT:
353 dumps.back().remark = cIndex(txt);
354 break;
355 case BEGIN:
356 case SOFTWAREDB:
357 case SOFTWARE:
358 case DUMP:
359 case ROM:
360 case END:
361 break;
362 default:
364 }
365}
366
367String32 DBParser::cIndex(string_view str)
368{
369 auto* begin = const_cast<char*>(str.data());
370 auto* end = begin + str.size();
371 *end = 0;
372 String32 result;
373 toString32(bufStart, begin, result);
374 return result;
375}
376
377// called on </software>
378void DBParser::addEntries()
379{
380 append(db, view::transform(dumps, [&](auto& d) {
381 return RomDatabase::Entry{
382 d.hash,
383 RomInfo(title, year, company, country, d.origValue,
384 d.origData, d.remark, d.type, genMSXid)};
385 }));
386}
387
388// called on </softwaredb>
389void DBParser::addAllEntries()
390{
391 // Calculate boundary between old and new entries.
392 // old: [first, mid) already sorted, no duplicates
393 // new: [mid, last) not yet sorted, may have duplicates
394 // there may also be duplicates between old and new
395 const auto first = begin(db);
396 const auto last = end (db);
397 const auto mid = first + initialSize;
398 if (mid == last) return; // no new entries
399
400 // Sort new entries, old entries are already sorted.
401 ranges::sort(mid, last, {}, &RomDatabase::Entry::sha1);
402
403 // Filter duplicates from new entries. This is similar to the
404 // unique() algorithm, except that it also warns about duplicates.
405 auto it1 = mid;
406 auto it2 = mid + 1;
407 // skip initial non-duplicates
408 while (it2 != last) {
409 if (it1->sha1 == it2->sha1) break;
410 ++it1; ++it2;
411 }
412 // move non-duplicates up
413 while (it2 != last) {
414 if (it1->sha1 == it2->sha1) {
415 cliComm.printWarning(
416 "duplicate softwaredb entry SHA1: ",
417 it2->sha1.toString());
418 } else {
419 ++it1;
420 *it1 = std::move(*it2);
421 }
422 ++it2;
423 }
424 // actually erase the duplicates (typically none)
425 db.erase(it1 + 1, last);
426 // At this point both old and new entries are sorted and unique. But
427 // there may still be duplicates between old and new.
428
429 // Merge new and old entries. This is similar to the inplace_merge()
430 // algorithm, except that duplicates (between old and new) are removed.
431 if (first == mid) return; // no old entries (common case)
432 RomDatabase::RomDB result;
433 result.reserve(db.size());
434 it1 = first;
435 it2 = mid;
436 // while both new and old still have elements
437 while (it1 != mid && it2 != last) {
438 if (it1->sha1 < it2->sha1) {
439 result.push_back(std::move(*it1));
440 ++it1;
441 } else {
442 if (it1->sha1 != it2->sha1) { // *it2 < *it1
443 result.push_back(std::move(*it2));
444 ++it2;
445 } else {
446 // pick old entry, silently ignore new
447 result.push_back(std::move(*it1));
448 ++it1; ++it2;
449 }
450 }
451 }
452 // move remaining old or new entries (one of these is empty)
453 move(it1, mid, back_inserter(result));
454 move(it2, last, back_inserter(result));
455
456 // make result the new current database
457 swap(result, db);
458}
459
460static const char* parseStart(string_view s)
461{
462 // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
463 return ((s.size() == 6) && s.starts_with("0x")) ? (s.data() + 2) : nullptr;
464}
465
467{
468 if (unknownLevel) {
469 --unknownLevel;
470 return;
471 }
472
473 switch (state) {
474 case SOFTWAREDB:
475 addAllEntries();
476 state = END;
477 break;
478 case SOFTWARE:
479 addEntries();
480 state = SOFTWAREDB;
481 break;
482 case SYSTEM:
483 case TITLE:
484 case COMPANY:
485 case YEAR:
486 case COUNTRY:
487 case GENMSXID:
488 state = SOFTWARE;
489 break;
490 case DUMP:
491 if (dumps.back().hash.empty()) {
492 // no sha1 sum specified, drop this dump
493 dumps.pop_back();
494 }
495 state = SOFTWARE;
496 break;
497 case ORIGINAL:
498 state = DUMP;
499 break;
500 case ROM: {
501 string_view t = type;
502 char buf[12];
503 if (small_compare<"Mirrored">(t)) {
504 if (const char* s = parseStart(startVal)) {
505 memcpy(buf, t.data(), 8);
506 memcpy(buf + 8, s, 4);
507 t = string_view(buf, 12);
508 }
509 } else if (small_compare<"Normal">(t)) {
510 if (const char* s = parseStart(startVal)) {
511 memcpy(buf, t.data(), 6);
512 memcpy(buf + 6, s, 4);
513 t = string_view(buf, 10);
514 }
515 }
517 if (romType == ROM_UNKNOWN) {
518 unknownTypes[std::string(t)]++;
519 }
520 dumps.back().type = romType;
521 state = DUMP;
522 break;
523 }
524 case TYPE:
525 case START:
526 case HASH:
527 case DUMP_REMARK:
528 state = ROM;
529 break;
530 case DUMP_TEXT:
531 state = DUMP_REMARK;
532 break;
533 case BEGIN:
534 case END:
535 throw MSXException("Unexpected closing tag");
536
537 default:
539 }
540}
541
542void DBParser::doctype(string_view txt)
543{
544 auto pos1 = txt.find(" SYSTEM \"");
545 if (pos1 == string_view::npos) return;
546 auto t = txt.substr(pos1 + 9);
547 auto pos2 = t.find('"');
548 if (pos2 == string_view::npos) return;
549 systemID = t.substr(0, pos2);
550}
551
552static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
553 RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
554{
555 DBParser handler(db, unknownTypes, cliComm, bufStart);
556 rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
557
558 if (handler.getSystemID() != "softwaredb1.dtd") {
560 "Missing or wrong systemID.\n"
561 "You're probably using an old incompatible file format.",
562 nullptr);
563 }
564}
565
567{
568 db.reserve(3500);
569 UnknownTypes unknownTypes;
570 // first user- then system-directory
571 std::vector<File> files;
572 size_t bufferSize = 0;
573 for (const auto& p : systemFileContext().getPaths()) {
574 try {
575 auto& f = files.emplace_back(p + "/softwaredb.xml");
576 bufferSize += f.getSize() + rapidsax::EXTRA_BUFFER_SPACE;
577 } catch (MSXException& /*e*/) {
578 // Ignore. It's not unusual the DB in the user
579 // directory is not found. In case there's an error
580 // with both user and system DB, we must give a
581 // warning, but that's done below.
582 }
583 }
584 buffer.resize(bufferSize);
585 size_t bufferOffset = 0;
586 for (auto& file : files) {
587 try {
588 auto size = file.getSize();
589 auto* buf = &buffer[bufferOffset];
590 bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
591 file.read(buf, size);
592 buf[size] = 0;
593
594 parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
595 } catch (rapidsax::ParseError& e) {
596 cliComm.printWarning(
597 "Rom database parsing failed: ", e.what());
598 } catch (MSXException& /*e*/) {
599 // Ignore, see above
600 }
601 }
602 if (bufferSize) buffer[0] = 0;
603 if (db.empty()) {
604 cliComm.printWarning(
605 "Couldn't load software database.\n"
606 "This may cause incorrect ROM mapper types to be used.");
607 }
608 if (!unknownTypes.empty()) {
609 std::string output = "Unknown mapper types in software database: ";
610 for (const auto& [type, count] : unknownTypes) {
611 strAppend(output, type, " (", count, "x); ");
612 }
613 cliComm.printWarning(output);
614 }
615}
616
617const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
618{
619 auto it = ranges::lower_bound(db, sha1sum, {}, &Entry::sha1);
620 return ((it != end(db)) && (it->sha1 == sha1sum))
621 ? &it->romInfo : nullptr;
622}
623
624} // namespace openmsx
int g
constexpr const char * fromString32(const char *buffer, uint32_t str32)
Definition: String32.hh:35
constexpr void toString32(const char *buffer, const char *str, uint32_t &result)
Definition: String32.hh:25
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition: String32.hh:22
TclObject t
bool empty() const
Definition: hash_set.hh:524
void printWarning(std::string_view message)
Definition: CliComm.cc:10
void doctype(string_view txt)
Definition: RomDatabase.cc:542
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
Definition: RomDatabase.cc:26
void attribute(string_view name, string_view value)
Definition: RomDatabase.cc:269
void start(string_view tag)
Definition: RomDatabase.cc:103
string_view getSystemID() const
Definition: RomDatabase.cc:45
void text(string_view txt)
Definition: RomDatabase.cc:302
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:111
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
RomDatabase(CliComm &cliComm)
Definition: RomDatabase.cc:566
std::vector< Entry > RomDB
Definition: RomDatabase.hh:20
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
Definition: RomDatabase.cc:617
static RomType nameToRomType(std::string_view name)
Definition: RomInfo.cc:174
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:22
ALWAYS_INLINE unsigned count(const uint8_t *pIn, const uint8_t *pMatch, const uint8_t *pInLimit)
Definition: lz4.cc:146
constexpr double e
Definition: Math.hh:18
bool stringToBool(string_view str)
Definition: StringOp.cc:12
This file implemented 3 utility functions:
Definition: Autofire.cc:9
const FileContext & systemFileContext()
Definition: FileContext.cc:155
hash_map< std::string, unsigned, XXHasher > UnknownTypes
Definition: RomDatabase.cc:21
@ ROM_UNKNOWN
Definition: RomTypes.hh:90
constexpr void sort(RandomAccessRange &&range)
Definition: ranges.hh:33
auto lower_bound(ForwardRange &&range, const T &value, Compare comp={}, Proj proj={})
Definition: ranges.hh:99
constexpr size_t EXTRA_BUFFER_SPACE
Definition: rapidsax.hh:41
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
size_t size(std::string_view utf8)
constexpr auto transform(Range &&range, UnaryOp op)
Definition: view.hh:378
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:627
#define UNREACHABLE
Definition: unreachable.hh:38
constexpr auto begin(const zstring_view &x)
constexpr auto end(const zstring_view &x)