openMSX
RomDatabase.cc
Go to the documentation of this file.
1#include "RomDatabase.hh"
2
3#include "CliComm.hh"
4#include "File.hh"
5#include "FileContext.hh"
6#include "MSXException.hh"
7
8#include "String32.hh"
9#include "StringOp.hh"
10#include "hash_map.hh"
11#include "narrow.hh"
12#include "ranges.hh"
13#include "rapidsax.hh"
14#include "stl.hh"
15#include "unreachable.hh"
16#include "view.hh"
17#include "xxhash.hh"
18
19#include <array>
20#include <cassert>
21#include <string_view>
22
23using std::string_view;
24
25namespace openmsx {
26
28
30{
31public:
33 CliComm& cliComm_, char* bufStart_)
34 : db(db_)
35 , unknownTypes(unknownTypes_)
36 , cliComm(cliComm_)
37 , bufStart(bufStart_)
38 , initialSize(db.size())
39 {
40 }
41
42 // rapidsax handler interface
43 void start(string_view tag);
44 void attribute(string_view name, string_view value);
45 void text(string_view txt);
46 void stop();
47 void doctype(string_view txt);
48
49 [[nodiscard]] string_view getSystemID() const { return systemID; }
50
51private:
52 [[nodiscard]] String32 cIndex(string_view str) const;
53 void addEntries();
54 void addAllEntries();
55
56 enum State {
57 BEGIN,
58 SOFTWAREDB,
59 SOFTWARE,
60 SYSTEM,
61 TITLE,
62 COMPANY,
63 YEAR,
64 COUNTRY,
65 GENMSXID,
66 DUMP_REMARK,
67 DUMP_TEXT,
68 DUMP,
69 ORIGINAL,
70 ROM,
71 TYPE,
72 START,
73 HASH,
74 END
75 };
76
77 struct Dump {
78 String32 remark;
79 Sha1Sum hash;
80 String32 origData;
81 RomType type;
82 bool origValue;
83 };
84
86 UnknownTypes& unknownTypes;
87 CliComm& cliComm;
88 char* bufStart;
89
90 string_view systemID;
91 string_view type;
92 string_view startVal;
93
94 std::vector<Dump> dumps;
95 string_view system;
96 String32 title;
97 String32 company;
98 String32 year;
99 String32 country;
100 unsigned genMSXid;
101
102 State state = BEGIN;
103 unsigned unknownLevel = 0;
104 size_t initialSize;
105};
106
107void DBParser::start(string_view tag)
108{
109 if (unknownLevel) {
110 ++unknownLevel;
111 return;
112 }
113
114 assert(!tag.empty()); // rapidsax will reject empty tags
115 switch (state) {
116 case BEGIN:
117 if (tag == "softwaredb") {
118 state = SOFTWAREDB;
119 return;
120 }
121 throw MSXException("Expected <softwaredb> as root tag.");
122 case SOFTWAREDB:
123 if (small_compare<"software">(tag)) {
124 system = string_view();
125 toString32(bufStart, bufStart, title);
126 toString32(bufStart, bufStart, company);
127 toString32(bufStart, bufStart, year);
128 toString32(bufStart, bufStart, country);
129 genMSXid = 0;
130 dumps.clear();
131 state = SOFTWARE;
132 return;
133 }
134 break;
135 case SOFTWARE: {
136 switch (tag.front()) {
137 case 's':
138 if (small_compare<"system">(tag)) {
139 state = SYSTEM;
140 return;
141 }
142 break;
143 case 't':
144 tag.remove_prefix(1);
145 if (small_compare<"itle">(tag)) {
146 state = TITLE;
147 return;
148 }
149 break;
150 case 'c':
151 if (small_compare<"company">(tag)) {
152 state = COMPANY;
153 return;
154 } else if (small_compare<"country">(tag)) {
155 state = COUNTRY;
156 return;
157 }
158 break;
159 case 'y':
160 if (small_compare<"year">(tag)) {
161 state = YEAR;
162 return;
163 }
164 break;
165 case 'g':
166 if (small_compare<"genmsxid">(tag)) {
167 state = GENMSXID;
168 return;
169 }
170 break;
171 case 'd':
172 if (small_compare<"dump">(tag)) {
173 dumps.resize(dumps.size() + 1);
174 dumps.back().type = RomType::UNKNOWN;
175 dumps.back().origValue = false;
176 toString32(bufStart, bufStart, dumps.back().remark);
177 toString32(bufStart, bufStart, dumps.back().origData);
178 state = DUMP;
179 return;
180 }
181 break;
182 }
183 break;
184 }
185 case DUMP: {
186 switch (tag.front()) {
187 case 'o':
188 if (small_compare<"original">(tag)) {
189 dumps.back().origValue = false;
190 state = ORIGINAL;
191 return;
192 }
193 break;
194 case 'm':
195 if (small_compare<"megarom">(tag)) {
196 type = string_view();
197 startVal = string_view();
198 state = ROM;
199 return;
200 }
201 break;
202 case 'r':
203 tag.remove_prefix(1);
204 if (small_compare<"om">(tag)) {
205 type = "Mirrored";
206 startVal = string_view();
207 state = ROM;
208 return;
209 }
210 break;
211 }
212 break;
213 }
214 case ROM: {
215 switch (tag.front()) {
216 case 't':
217 if (small_compare<"type">(tag)) {
218 state = TYPE;
219 return;
220 }
221 break;
222 case 's':
223 tag.remove_prefix(1);
224 if (small_compare<"tart">(tag)) {
225 state = START;
226 return;
227 }
228 break;
229 case 'r':
230 if (small_compare<"remark">(tag)) {
231 state = DUMP_REMARK;
232 return;
233 }
234 break;
235 case 'h':
236 if (small_compare<"hash">(tag)) {
237 state = HASH;
238 return;
239 }
240 break;
241 }
242 break;
243 }
244 case DUMP_REMARK:
245 if (small_compare<"text">(tag)) {
246 state = DUMP_TEXT;
247 return;
248 }
249 break;
250 case SYSTEM:
251 case TITLE:
252 case COMPANY:
253 case YEAR:
254 case COUNTRY:
255 case GENMSXID:
256 case ORIGINAL:
257 case TYPE:
258 case START:
259 case HASH:
260 case DUMP_TEXT:
261 break;
262
263 case END:
264 throw MSXException("Unexpected opening tag: ", tag);
265
266 default:
268 }
269
270 ++unknownLevel;
271}
272
273void DBParser::attribute(string_view name, string_view value)
274{
275 if (unknownLevel) return;
276
277 switch (state) {
278 case ORIGINAL:
279 if (small_compare<"value">(name)) {
280 dumps.back().origValue = StringOp::stringToBool(value);
281 }
282 break;
283 case HASH:
284 case BEGIN:
285 case SOFTWAREDB:
286 case SOFTWARE:
287 case SYSTEM:
288 case TITLE:
289 case COMPANY:
290 case YEAR:
291 case COUNTRY:
292 case GENMSXID:
293 case DUMP_REMARK:
294 case DUMP_TEXT:
295 case DUMP:
296 case ROM:
297 case TYPE:
298 case START:
299 case END:
300 break;
301 default:
303 }
304}
305
306void DBParser::text(string_view txt)
307{
308 if (unknownLevel) return;
309
310 switch (state) {
311 case SYSTEM:
312 system = txt;
313 break;
314 case TITLE:
315 title = cIndex(txt);
316 break;
317 case COMPANY:
318 company = cIndex(txt);
319 break;
320 case YEAR:
321 year = cIndex(txt);
322 break;
323 case COUNTRY:
324 country = cIndex(txt);
325 break;
326 case GENMSXID: {
327 if (auto g = StringOp::stringToBase<10, unsigned>(txt)) {
328 genMSXid = *g;
329 } else {
330 cliComm.printWarning(
331 "Ignoring bad Generation MSX id (genmsxid) "
332 "in entry with title '", fromString32(bufStart, title),
333 ": ", txt);
334 }
335 break;
336 }
337 case ORIGINAL:
338 dumps.back().origData = cIndex(txt);
339 break;
340 case TYPE:
341 type = txt;
342 break;
343 case START:
344 startVal = txt;
345 break;
346 case HASH:
347 try {
348 dumps.back().hash = Sha1Sum(txt);
349 } catch (MSXException& e) {
350 cliComm.printWarning(
351 "Ignoring bad dump for '", fromString32(bufStart, title),
352 "': ", e.getMessage());
353 }
354 break;
355 case DUMP_REMARK:
356 case DUMP_TEXT:
357 dumps.back().remark = cIndex(txt);
358 break;
359 case BEGIN:
360 case SOFTWAREDB:
361 case SOFTWARE:
362 case DUMP:
363 case ROM:
364 case END:
365 break;
366 default:
368 }
369}
370
371String32 DBParser::cIndex(string_view str) const
372{
373 auto* begin = const_cast<char*>(str.data());
374 auto* end = begin + str.size();
375 *end = 0;
376 String32 result;
377 toString32(bufStart, begin, result);
378 return result;
379}
380
381// called on </software>
382void DBParser::addEntries()
383{
384 append(db, view::transform(dumps, [&](auto& d) {
385 return RomDatabase::Entry{
386 d.hash,
387 RomInfo(title, year, company, country, d.origValue,
388 d.origData, d.remark, d.type, genMSXid)};
389 }));
390}
391
392// called on </softwaredb>
393void DBParser::addAllEntries()
394{
395 // Calculate boundary between old and new entries.
396 // old: [first, mid) already sorted, no duplicates
397 // new: [mid, last) not yet sorted, may have duplicates
398 // there may also be duplicates between old and new
399 const auto first = begin(db);
400 const auto last = end (db);
401 const auto mid = first + narrow<ptrdiff_t>(initialSize);
402 if (mid == last) return; // no new entries
403
404 // Sort new entries, old entries are already sorted.
405 ranges::sort(mid, last, {}, &RomDatabase::Entry::sha1);
406
407 // Filter duplicates from new entries. This is similar to the
408 // unique() algorithm, except that it also warns about duplicates.
409 auto it1 = mid;
410 auto it2 = mid + 1;
411 // skip initial non-duplicates
412 while (it2 != last) {
413 if (it1->sha1 == it2->sha1) break;
414 ++it1; ++it2;
415 }
416 // move non-duplicates up
417 while (it2 != last) {
418 if (it1->sha1 == it2->sha1) {
419 cliComm.printWarning(
420 "duplicate softwaredb entry SHA1: ",
421 it2->sha1.toString());
422 } else {
423 ++it1;
424 *it1 = std::move(*it2);
425 }
426 ++it2;
427 }
428 // actually erase the duplicates (typically none)
429 db.erase(it1 + 1, last);
430 // At this point both old and new entries are sorted and unique. But
431 // there may still be duplicates between old and new.
432
433 // Merge new and old entries. This is similar to the inplace_merge()
434 // algorithm, except that duplicates (between old and new) are removed.
435 if (first == mid) return; // no old entries (common case)
436 RomDatabase::RomDB result;
437 result.reserve(db.size());
438 it1 = first;
439 it2 = mid;
440 // while both new and old still have elements
441 while (it1 != mid && it2 != last) {
442 if (it1->sha1 < it2->sha1) {
443 result.push_back(std::move(*it1));
444 ++it1;
445 } else {
446 if (it1->sha1 != it2->sha1) { // *it2 < *it1
447 result.push_back(std::move(*it2));
448 ++it2;
449 } else {
450 // pick old entry, silently ignore new
451 result.push_back(std::move(*it1));
452 ++it1; ++it2;
453 }
454 }
455 }
456 // move remaining old or new entries (one of these is empty)
457 move(it1, mid, back_inserter(result));
458 move(it2, last, back_inserter(result));
459
460 // make result the new current database
461 swap(result, db);
462}
463
464static const char* parseStart(string_view s)
465{
466 // we expect "0x0000", "0x4000", "0x8000", "0xc000" or ""
467 return ((s.size() == 6) && s.starts_with("0x")) ? (s.data() + 2) : nullptr;
468}
469
471{
472 if (unknownLevel) {
473 --unknownLevel;
474 return;
475 }
476
477 switch (state) {
478 case SOFTWAREDB:
479 addAllEntries();
480 state = END;
481 break;
482 case SOFTWARE:
483 addEntries();
484 state = SOFTWAREDB;
485 break;
486 case SYSTEM:
487 case TITLE:
488 case COMPANY:
489 case YEAR:
490 case COUNTRY:
491 case GENMSXID:
492 state = SOFTWARE;
493 break;
494 case DUMP:
495 if (dumps.back().hash.empty()) {
496 // no sha1 sum specified, drop this dump
497 dumps.pop_back();
498 }
499 state = SOFTWARE;
500 break;
501 case ORIGINAL:
502 state = DUMP;
503 break;
504 case ROM: {
505 string_view t = type;
506 std::array<char, 8 + 4> buf;
507 if (small_compare<"Mirrored">(t)) {
508 if (const char* s = parseStart(startVal)) {
509 ranges::copy(t, subspan<8>(buf, 0));
510 ranges::copy(std::string_view(s, 4), subspan<4>(buf, 8));
511 t = string_view(buf.data(), 8 + 4);
512 }
513 } else if (small_compare<"Normal">(t)) {
514 if (const char* s = parseStart(startVal)) {
515 ranges::copy(t, subspan<6>(buf, 0));
516 ranges::copy(std::string_view(s, 4), subspan<4>(buf, 6));
517 t = string_view(buf.data(), 6 + 4);
518 }
519 }
521 if (romType == RomType::UNKNOWN) {
522 unknownTypes[std::string(t)]++;
523 }
524 dumps.back().type = romType;
525 state = DUMP;
526 break;
527 }
528 case TYPE:
529 case START:
530 case HASH:
531 case DUMP_REMARK:
532 state = ROM;
533 break;
534 case DUMP_TEXT:
535 state = DUMP_REMARK;
536 break;
537 case BEGIN:
538 case END:
539 throw MSXException("Unexpected closing tag");
540
541 default:
543 }
544}
545
546void DBParser::doctype(string_view txt)
547{
548 auto pos1 = txt.find(" SYSTEM \"");
549 if (pos1 == string_view::npos) return;
550 auto t = txt.substr(pos1 + 9);
551 auto pos2 = t.find('"');
552 if (pos2 == string_view::npos) return;
553 systemID = t.substr(0, pos2);
554}
555
556static void parseDB(CliComm& cliComm, char* buf, char* bufStart,
557 RomDatabase::RomDB& db, UnknownTypes& unknownTypes)
558{
559 DBParser handler(db, unknownTypes, cliComm, bufStart);
560 rapidsax::parse<rapidsax::trimWhitespace>(handler, buf);
561
562 if (handler.getSystemID() != "softwaredb1.dtd") {
564 "Missing or wrong systemID.\n"
565 "You're probably using an old incompatible file format.",
566 nullptr);
567 }
568}
569
571{
572 db.reserve(3500);
573 UnknownTypes unknownTypes;
574 // first user- then system-directory
575 std::vector<File> files;
576 size_t bufferSize = 0;
577 for (const auto& p : systemFileContext().getPaths()) {
578 try {
579 auto& f = files.emplace_back(p + "/softwaredb.xml");
580 bufferSize += f.getSize() + rapidsax::EXTRA_BUFFER_SPACE;
581 } catch (MSXException& /*e*/) {
582 // Ignore. It's not unusual the DB in the user
583 // directory is not found. In case there's an error
584 // with both user and system DB, we must give a
585 // warning, but that's done below.
586 }
587 }
588 buffer.resize(bufferSize);
589 size_t bufferOffset = 0;
590 for (auto& file : files) {
591 try {
592 auto size = file.getSize();
593 auto* buf = &buffer[bufferOffset];
594 bufferOffset += size + rapidsax::EXTRA_BUFFER_SPACE;
595 file.read(std::span{buf, size});
596 buf[size] = 0;
597
598 parseDB(cliComm, buf, buffer.data(), db, unknownTypes);
599 } catch (rapidsax::ParseError& e) {
600 cliComm.printWarning(
601 "Rom database parsing failed: ", e.what());
602 } catch (MSXException& /*e*/) {
603 // Ignore, see above
604 }
605 }
606 if (bufferSize) buffer[0] = 0;
607 if (db.empty()) {
608 cliComm.printWarning(
609 "Couldn't load software database.\n"
610 "This may cause incorrect ROM mapper types to be used.");
611 }
612 if (!unknownTypes.empty()) {
613 std::string output = "Unknown mapper types in software database: ";
614 for (const auto& [type, count] : unknownTypes) {
615 strAppend(output, type, " (", count, "x); ");
616 }
617 cliComm.printWarning(output);
618 }
619}
620
621const RomInfo* RomDatabase::fetchRomInfo(const Sha1Sum& sha1sum) const
622{
623 auto d = binary_find(db, sha1sum, {}, &Entry::sha1);
624 return d ? &d->romInfo : nullptr;
625}
626
627} // namespace openmsx
int g
constexpr const char * fromString32(const char *buffer, uint32_t str32)
Definition String32.hh:34
constexpr void toString32(const char *buffer, const char *str, uint32_t &result)
Definition String32.hh:25
std::conditional_t<(sizeof(char *) > sizeof(uint32_t)), uint32_t, const char * > String32
Definition String32.hh:22
TclObject t
bool empty() const
Definition hash_set.hh:521
void printWarning(std::string_view message)
Definition CliComm.cc:12
void doctype(string_view txt)
DBParser(RomDatabase::RomDB &db_, UnknownTypes &unknownTypes_, CliComm &cliComm_, char *bufStart_)
void attribute(string_view name, string_view value)
void start(string_view tag)
string_view getSystemID() const
void text(string_view txt)
std::span< const std::string > getPaths() const
void resize(size_t size)
Grow or shrink the memory block.
Definition MemBuffer.hh:156
const T * data() const
Returns pointer to the start of the memory buffer.
Definition MemBuffer.hh:79
RomDatabase(CliComm &cliComm)
std::vector< Entry > RomDB
const RomInfo * fetchRomInfo(const Sha1Sum &sha1sum) const
Lookup an entry in the database by sha1sum.
static RomType nameToRomType(std::string_view name)
Definition RomInfo.cc:181
This class represents the result of a sha1 calculation (a 160-bit value).
Definition sha1.hh:24
bool stringToBool(string_view str)
Definition StringOp.cc:16
This file implemented 3 utility functions:
Definition Autofire.cc:11
const FileContext & systemFileContext()
hash_map< std::string, unsigned, XXHasher > UnknownTypes
constexpr auto copy(InputRange &&range, OutputIter out)
Definition ranges.hh:252
constexpr void sort(RandomAccessRange &&range)
Definition ranges.hh:51
constexpr size_t EXTRA_BUFFER_SPACE
Definition rapidsax.hh:44
constexpr auto transform(Range &&range, UnaryOp op)
Definition view.hh:520
auto * binary_find(ForwardRange &&range, const T &value, Compare comp={}, Proj proj={})
Definition ranges.hh:448
void strAppend(std::string &result, Ts &&...ts)
Definition strCat.hh:752
#define UNREACHABLE
constexpr auto begin(const zstring_view &x)
constexpr auto end(const zstring_view &x)