openMSX
serialize.cc
Go to the documentation of this file.
1#include "serialize.hh"
2
3#include "Base64.hh"
4#include "HexDump.hh"
5#include "XMLElement.hh"
6#include "XMLException.hh"
7#include "DeltaBlock.hh"
8#include "MemBuffer.hh"
9#include "FileOperations.hh"
10#include "Version.hh"
11#include "Date.hh"
12#include "narrow.hh"
13#include "one_of.hh"
14#include "stl.hh"
15#include "build-info.hh"
16
17#include <bit>
18#include "cstdiop.hh" // for dup()
19#include <cstdint>
20#include <cstring>
21#include <iostream>
22#include <limits>
23
24using std::string;
25using std::string_view;
26
27namespace openmsx {
28
29template<typename Derived>
30void ArchiveBase<Derived>::attribute(const char* name, const char* value)
31{
32 string valueStr(value);
33 self().attribute(name, valueStr);
34}
37
39
40unsigned OutputArchiveBase2::generateID1(const void* p)
41{
42 #ifdef linux
43 assert("Can't serialize ID of object located on the stack" &&
44 !addressOnStack(p));
45 #endif
46 ++lastId;
47 assert(!polyIdMap.contains(p));
48 polyIdMap.emplace_noDuplicateCheck(p, lastId);
49 return lastId;
50}
51unsigned OutputArchiveBase2::generateID2(
52 const void* p, const std::type_info& typeInfo)
53{
54 #ifdef linux
55 assert("Can't serialize ID of object located on the stack" &&
56 !addressOnStack(p));
57 #endif
58 ++lastId;
59 auto key = std::pair(p, std::type_index(typeInfo));
60 assert(!idMap.contains(key));
61 idMap.emplace_noDuplicateCheck(key, lastId);
62 return lastId;
63}
64
65unsigned OutputArchiveBase2::getID1(const void* p)
66{
67 const auto* v = lookup(polyIdMap, p);
68 return v ? *v : 0;
69}
70unsigned OutputArchiveBase2::getID2(
71 const void* p, const std::type_info& typeInfo)
72{
73 const auto* v = lookup(idMap, std::pair(p, std::type_index(typeInfo)));
74 return v ? *v : 0;
75}
76
77
78template<typename Derived>
80 const char* tag, std::span<const uint8_t> data, bool /*diff*/)
81{
82 string encoding;
83 string tmp;
84 if (false) {
85 // useful for debugging
86 encoding = "hex";
87 tmp = HexDump::encode(data);
88 } else if (false) {
89 encoding = "base64";
90 tmp = Base64::encode(data);
91 } else {
92 encoding = "gz-base64";
93 // TODO check for overflow?
94 auto len = data.size();
95 auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
96 MemBuffer<uint8_t> buf(dstLen);
97 if (compress2(buf.data(), &dstLen,
98 std::bit_cast<const Bytef*>(data.data()),
99 uLong(len), 9)
100 != Z_OK) {
101 throw MSXException("Error while compressing blob.");
102 }
103 tmp = Base64::encode(buf.first(dstLen));
104 }
105 this->self().beginTag(tag);
106 this->self().attribute("encoding", encoding);
107 Saver<string> saver;
108 saver(this->self(), tmp, false);
109 this->self().endTag(tag);
110}
111
114
116
118{
119 auto* v = lookup(idMap, id);
120 return v ? *v : nullptr;
121}
122
123void InputArchiveBase2::addPointer(unsigned id, const void* p)
124{
125 assert(!idMap.contains(id));
126 idMap.emplace_noDuplicateCheck(id, const_cast<void*>(p));
127}
128
129unsigned InputArchiveBase2::getId(const void* ptr) const
130{
131 for (const auto& [id, pt] : idMap) {
132 if (pt == ptr) return id;
133 }
134 return 0;
135}
136
137template<typename Derived>
139 const char* tag, std::span<uint8_t> data, bool /*diff*/)
140{
141 this->self().beginTag(tag);
142 string encoding;
143 this->self().attribute("encoding", encoding);
144
145 string_view tmp = this->self().loadStr();
146 this->self().endTag(tag);
147
148 if (encoding == "gz-base64") {
149 auto buf = Base64::decode(tmp);
150 auto dstLen = uLongf(data.size()); // TODO check for overflow?
151 if ((uncompress(std::bit_cast<Bytef*>(data.data()), &dstLen,
152 std::bit_cast<const Bytef*>(buf.data()), uLong(buf.size()))
153 != Z_OK) ||
154 (dstLen != data.size())) {
155 throw MSXException("Error while decompressing blob.");
156 }
157 } else if (encoding == one_of("hex", "base64")) {
158 bool ok = (encoding == "hex")
159 ? HexDump::decode_inplace(tmp, data)
160 : Base64 ::decode_inplace(tmp, data);
161 if (!ok) {
162 throw XMLException(
163 "Length of decoded blob different from "
164 "expected value (", data.size(), ')');
165 }
166 } else {
167 throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
168 }
169}
170
173
175
176void MemOutputArchive::save(std::string_view s)
177{
178 auto size = s.size();
179 auto buf = buffer.allocate(sizeof(size) + size);
180 memcpy(buf.data(), &size, sizeof(size));
181 ranges::copy(s, subspan(buf, sizeof(size)));
182}
183
185
186void MemInputArchive::load(std::string& s)
187{
188 size_t length;
189 load(length);
190 s.resize(length);
191 if (length) {
192 buffer.read(s.data(), length);
193 }
194}
195
197{
198 size_t length;
199 load(length);
200 const uint8_t* p = buffer.getCurrentPos();
201 buffer.skip(length);
202 return {std::bit_cast<const char*>(p), length};
203}
204
206
207// Too small inputs don't compress very well (often the compressed size is even
208// bigger than the input). It also takes a relatively long time (because often
209// compression has a relatively large setup time). I choose this value
210// semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
211// registers won't be compressed.
212static constexpr size_t SMALL_SIZE = 64;
213void MemOutputArchive::serialize_blob(const char* /*tag*/, std::span<const uint8_t> data,
214 bool diff)
215{
216 // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
217 if (data.size() > SMALL_SIZE) {
218 auto deltaBlockIdx = unsigned(deltaBlocks.size());
219 save(deltaBlockIdx); // see comment below in MemInputArchive
220 deltaBlocks.push_back(diff
221 ? lastDeltaBlocks.createNew(data.data(), data)
222 : lastDeltaBlocks.createNullDiff(data.data(), data));
223 } else {
224 auto buf = buffer.allocate(data.size());
225 ranges::copy(data, buf);
226 }
227}
228
229void MemInputArchive::serialize_blob(const char* /*tag*/, std::span<uint8_t> data,
230 bool /*diff*/)
231{
232 if (data.size() > SMALL_SIZE) {
233 // Usually blobs are saved in the same order as they are loaded
234 // (via the serialize_blob() methods in respectively
235 // MemOutputArchive and MemInputArchive). In that case keeping
236 // track of the deltaBlockIdx in the savestate itself is
237 // redundant (it will simply be an increasing value). However
238 // in rare cases, via the {begin,end,skip)Section() methods, it
239 // is possible that certain blobs are stored in the savestate,
240 // but skipped while loading. That's why we do need the index.
241 unsigned deltaBlockIdx; load(deltaBlockIdx);
242 deltaBlocks[deltaBlockIdx]->apply(data);
243 } else {
244 ranges::copy(std::span{buffer.getCurrentPos(), data.size()}, data);
245 buffer.skip(data.size());
246 }
247}
248
250
252 : filename(filename_)
253 , writer(*this)
254{
255 {
256 auto f = FileOperations::openFile(filename, "wb");
257 if (!f) error();
258 int duped_fd = dup(fileno(f.get()));
259 if (duped_fd == -1) error();
260 file = gzdopen(duped_fd, "wb9");
261 if (!file) {
262 ::close(duped_fd);
263 error();
264 }
265 // on scope-exit 'f' is closed, and 'file'
266 // uses the dup()'ed file descriptor.
267 }
268
269 static constexpr std::string_view header =
270 "<?xml version=\"1.0\" ?>\n"
271 "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
272 write(header);
273
274 writer.begin("serial");
275 writer.attribute("openmsx_version", Version::full());
276 writer.attribute("date_time", Date::toString(time(nullptr)));
277 writer.attribute("platform", TARGET_PLATFORM);
278}
279
281{
282 if (!file) return; // already closed
283
284 writer.end("serial");
285
286 if (gzclose(file) != Z_OK) {
287 error();
288 }
289 file = nullptr;
290}
291
293{
294 try {
295 close();
296 } catch (...) {
297 // Eat exception. Explicitly call close() if you want to handle errors.
298 }
299}
300
301void XmlOutputArchive::write(std::span<const char> buf)
302{
303 if ((gzwrite(file, buf.data(), unsigned(buf.size())) == 0) && !buf.empty()) {
304 error();
305 }
306}
307
309{
310 if (gzputc(file, c) == -1) {
311 error();
312 }
313}
314
315void XmlOutputArchive::check(bool condition) const
316{
317 assert(condition); (void)condition;
318}
319
321{
322 if (file) {
323 gzclose(file);
324 file = nullptr;
325 }
326 throw XMLException("could not write \"", filename, '"');
327}
328
330{
331 writer.data(std::string_view(&c, 1));
332}
333void XmlOutputArchive::save(std::string_view str)
334{
335 writer.data(str);
336}
338{
339 writer.data(b ? "true" : "false");
340}
341void XmlOutputArchive::save(unsigned char b)
342{
343 save(unsigned(b));
344}
345void XmlOutputArchive::save(signed char c)
346{
347 save(int(c));
348}
350{
351 save(int(c));
352}
354{
355 saveImpl(i);
356}
358{
359 saveImpl(u);
360}
361void XmlOutputArchive::save(unsigned long long ull)
362{
363 saveImpl(ull);
364}
365
366void XmlOutputArchive::attribute(const char* name, std::string_view str)
367{
368 writer.attribute(name, str);
369}
370void XmlOutputArchive::attribute(const char* name, int i)
371{
372 attributeImpl(name, i);
373}
374void XmlOutputArchive::attribute(const char* name, unsigned u)
375{
376 attributeImpl(name, u);
377}
378
379void XmlOutputArchive::beginTag(const char* tag)
380{
381 writer.begin(tag);
382}
383void XmlOutputArchive::endTag(const char* tag)
384{
385 writer.end(tag);
386}
387
389
390XmlInputArchive::XmlInputArchive(const string& filename)
391{
392 xmlDoc.load(filename, "openmsx-serialize.dtd");
393 const auto* root = xmlDoc.getRoot();
394 elems.emplace_back(root, root->getFirstChild());
395}
396
397string_view XmlInputArchive::loadStr() const
398{
399 if (currentElement()->hasChildren()) {
400 throw XMLException("No child tags expected for primitive type");
401 }
402 return currentElement()->getData();
403}
404void XmlInputArchive::load(string& t) const
405{
406 t = loadStr();
407}
408void XmlInputArchive::loadChar(char& c) const
409{
410 std::string str;
411 load(str);
412 std::istringstream is(str);
413 is >> c;
414}
415void XmlInputArchive::load(bool& b) const
416{
417 string_view s = loadStr();
418 if (s == one_of("true", "1")) {
419 b = true;
420 } else if (s == one_of("false", "0")) {
421 b = false;
422 } else {
423 throw XMLException("Bad value found for boolean: ", s);
424 }
425}
426
427// This function parses a number from a string. It's similar to the generic
428// templatized XmlInputArchive::load() method, but _much_ faster. It does
429// have some limitations though:
430// - it can't handle leading whitespace
431// - it can't handle extra characters at the end of the string
432// - it can only handle one base (only decimal, not octal or hexadecimal)
433// - it doesn't understand a leading '+' sign
434// - it doesn't detect overflow or underflow (The generic implementation sets
435// a 'bad' flag on the stream and clips the result to the min/max allowed
436// value. Though this 'bad' flag was ignored by the openMSX code).
437// This routine is only used to parse strings we've written ourselves (and the
438// savestate/replay XML files are not meant to be manually edited). So the
439// above limitations don't really matter. And we can use the speed gain.
440template<std::integral T> static inline void fastAtoi(string_view str, T& t)
441{
442 t = 0;
443 bool neg = false;
444 size_t i = 0;
445 size_t l = str.size();
446
447 if constexpr (std::numeric_limits<T>::is_signed) {
448 if (l == 0) return;
449 if (str[0] == '-') {
450 neg = true;
451 i = 1;
452 }
453 }
454 for (; i < l; ++i) {
455 unsigned d = str[i] - '0';
456 if (d > 9) [[unlikely]] {
457 throw XMLException("Invalid integer: ", str);
458 }
459 t = 10 * t + d;
460 }
461 if constexpr (std::numeric_limits<T>::is_signed) {
462 if (neg) t = -t;
463 } else {
464 assert(!neg); (void)neg;
465 }
466}
467void XmlInputArchive::load(int& i) const
468{
469 string_view str = loadStr();
470 fastAtoi(str, i);
471}
472void XmlInputArchive::load(unsigned& u) const
473{
474 string_view str = loadStr();
475 try {
476 fastAtoi(str, u);
477 } catch (XMLException&) {
478 // One reason could be that the type of a member was corrected
479 // from 'int' to 'unsigned'. In that case loading an old
480 // savestate (that contains a negative value) might fail. So try
481 // again parsing as an 'int'.
482 int i;
483 fastAtoi(str, i);
484 u = narrow_cast<unsigned>(i);
485 }
486}
487void XmlInputArchive::load(unsigned long long& ull) const
488{
489 string_view str = loadStr();
490 fastAtoi(str, ull);
491}
492void XmlInputArchive::load(unsigned char& b) const
493{
494 unsigned u;
495 load(u);
496 b = narrow_cast<unsigned char>(u);
497}
498void XmlInputArchive::load(signed char& c) const
499{
500 int i;
501 load(i);
502 c = narrow_cast<signed char>(i);
503}
504void XmlInputArchive::load(char& c) const
505{
506 int i;
507 load(i);
508 c = narrow_cast<char>(i);
509}
510
511void XmlInputArchive::beginTag(const char* tag)
512{
513 const auto* child = currentElement()->findChild(tag, elems.back().second);
514 if (!child) {
515 string path;
516 for (const auto& [e, _] : elems) {
517 strAppend(path, e->getName(), '/');
518 }
519 throw XMLException("No child tag \"", tag,
520 "\" found at location \"", path, '\"');
521 }
522 elems.emplace_back(child, child->getFirstChild());
523}
524void XmlInputArchive::endTag(const char* tag)
525{
526 const auto& elem = *currentElement();
527 if (elem.getName() != tag) {
528 throw XMLException("End tag \"", elem.getName(),
529 "\" not equal to begin tag \"", tag, "\"");
530 }
531 auto& elem2 = const_cast<XMLElement&>(elem);
532 elem2.clearName(); // mark this elem for later beginTag() calls
533 elems.pop_back();
534}
535
536void XmlInputArchive::attribute(const char* name, string& t) const
537{
538 const auto* attr = currentElement()->findAttribute(name);
539 if (!attr) {
540 throw XMLException("Missing attribute \"", name, "\".");
541 }
542 t = attr->getValue();
543}
544void XmlInputArchive::attribute(const char* name, int& i) const
545{
546 attributeImpl(name, i);
547}
548void XmlInputArchive::attribute(const char* name, unsigned& u) const
549{
550 attributeImpl(name, u);
551}
552bool XmlInputArchive::hasAttribute(const char* name) const
553{
554 return currentElement()->findAttribute(name);
555}
557{
558 return int(currentElement()->numChildren());
559}
560
561} // namespace openmsx
uintptr_t id
TclObject t
bool contains(const K &k) const
Definition hash_map.hh:110
iterator emplace_noDuplicateCheck(Args &&... args)
Definition hash_set.hh:472
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition serialize.hh:248
void * getPointer(unsigned id)
Definition serialize.cc:117
unsigned getId(const void *p) const
Definition serialize.cc:129
void addPointer(unsigned id, const void *p)
Definition serialize.cc:123
void serialize_blob(const char *tag, std::span< uint8_t > data, bool diff=true)
Definition serialize.cc:138
void read(void *result, size_t len)
Read the given number of bytes.
const uint8_t * getCurrentPos() const
Return a pointer to the current position in the buffer.
void skip(size_t len)
Skip the given number of bytes.
std::shared_ptr< DeltaBlock > createNew(const void *id, std::span< const uint8_t > data)
std::shared_ptr< DeltaBlock > createNullDiff(const void *id, std::span< const uint8_t > data)
This class manages the lifetime of a block of memory.
Definition MemBuffer.hh:32
std::span< const T > first(size_t n) const
Definition MemBuffer.hh:127
const T * data() const
Returns pointer to the start of the memory buffer.
Definition MemBuffer.hh:79
void serialize_blob(const char *tag, std::span< uint8_t > data, bool diff=true)
Definition serialize.cc:229
std::string_view loadStr()
Definition serialize.cc:196
void save(const T &t)
Definition serialize.hh:674
void serialize_blob(const char *tag, std::span< const uint8_t > data, bool diff=true)
Definition serialize.cc:213
void serialize_blob(const char *tag, std::span< const uint8_t > data, bool diff=true)
Definition serialize.cc:79
std::span< uint8_t > allocate(size_t len)
Reserve space to insert the given number of bytes.
unsigned size() const
Definition TclObject.hh:179
static std::string full()
Definition Version.cc:8
const XMLElement * getRoot() const
void load(const std::string &filename, std::string_view systemID)
const XMLAttribute * findAttribute(std::string_view attrName) const
Definition XMLElement.cc:96
const XMLElement * findChild(std::string_view childName) const
Definition XMLElement.cc:21
std::string_view getData() const
void attribute(const char *name, T &t)
Definition serialize.hh:985
bool hasAttribute(const char *name) const
Definition serialize.cc:552
void load(bool &b) const
Definition serialize.cc:415
void endTag(const char *tag)
Definition serialize.cc:524
XmlInputArchive(const std::string &filename)
Definition serialize.cc:390
const XMLElement * currentElement() const
Definition serialize.hh:966
std::string_view loadStr() const
Definition serialize.cc:397
void attributeImpl(const char *name, T &t) const
Definition serialize.hh:978
void loadChar(char &c) const
Definition serialize.cc:408
void beginTag(const char *tag)
Definition serialize.cc:511
void saveImpl(const T &t)
Definition serialize.hh:853
void check(bool condition) const
Definition serialize.cc:315
void write(std::span< const char > buf)
Definition serialize.cc:301
void save(const T &t)
Definition serialize.hh:859
XmlOutputArchive(zstring_view filename)
Definition serialize.cc:251
void attributeImpl(const char *name, const T &t)
Definition serialize.hh:897
void endTag(const char *tag)
Definition serialize.cc:383
void beginTag(const char *tag)
Definition serialize.cc:379
void attribute(const char *name, const T &t)
Definition serialize.hh:901
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition hash_map.hh:118
bool decode_inplace(std::string_view input, std::span< uint8_t > output)
Definition HexDump.cc:76
std::string toString(time_t time)
Definition Date.cc:153
FILE_t openFile(zstring_view filename, zstring_view mode)
Call fopen() in a platform-independent manner.
This file implemented 3 utility functions:
Definition Autofire.cc:11
constexpr auto copy(InputRange &&range, OutputIter out)
Definition ranges.hh:252
constexpr auto subspan(Range &&range, size_t offset, size_t count=std::dynamic_extent)
Definition ranges.hh:481
void strAppend(std::string &result, Ts &&...ts)
Definition strCat.hh:752