openMSX
serialize.cc
Go to the documentation of this file.
1#include "serialize.hh"
2
3#include "Base64.hh"
4#include "HexDump.hh"
5#include "XMLElement.hh"
6#include "XMLException.hh"
7#include "DeltaBlock.hh"
8#include "MemBuffer.hh"
9#include "FileOperations.hh"
10#include "Version.hh"
11#include "Date.hh"
12#include "narrow.hh"
13#include "one_of.hh"
14#include "stl.hh"
15#include "build-info.hh"
16
17#include <bit>
18#include "cstdiop.hh" // for dup()
19#include <cstdint>
20#include <cstring>
21#include <iostream>
22#include <limits>
23
24using std::string;
25using std::string_view;
26
27namespace openmsx {
28
29template<typename Derived>
30void ArchiveBase<Derived>::attribute(const char* name, const char* value)
31{
32 string valueStr(value);
33 self().attribute(name, valueStr);
34}
37
39
40unsigned OutputArchiveBase2::generateID1(const void* p)
41{
42 #ifdef linux
43 assert("Can't serialize ID of object located on the stack" &&
44 !addressOnStack(p));
45 #endif
46 ++lastId;
47 assert(!polyIdMap.contains(p));
48 polyIdMap.emplace_noDuplicateCheck(p, lastId);
49 return lastId;
50}
51unsigned OutputArchiveBase2::generateID2(
52 const void* p, const std::type_info& typeInfo)
53{
54 #ifdef linux
55 assert("Can't serialize ID of object located on the stack" &&
56 !addressOnStack(p));
57 #endif
58 ++lastId;
59 auto key = std::pair(p, std::type_index(typeInfo));
60 assert(!idMap.contains(key));
61 idMap.emplace_noDuplicateCheck(key, lastId);
62 return lastId;
63}
64
65unsigned OutputArchiveBase2::getID1(const void* p)
66{
67 auto* v = lookup(polyIdMap, p);
68 return v ? *v : 0;
69}
70unsigned OutputArchiveBase2::getID2(
71 const void* p, const std::type_info& typeInfo)
72{
73 auto* v = lookup(idMap, std::pair(p, std::type_index(typeInfo)));
74 return v ? *v : 0;
75}
76
77
78template<typename Derived>
80 const char* tag, std::span<const uint8_t> data, bool /*diff*/)
81{
82 string encoding;
83 string tmp;
84 if (false) {
85 // useful for debugging
86 encoding = "hex";
87 tmp = HexDump::encode(data);
88 } else if (false) {
89 encoding = "base64";
90 tmp = Base64::encode(data);
91 } else {
92 encoding = "gz-base64";
93 // TODO check for overflow?
94 auto len = data.size();
95 auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
96 MemBuffer<uint8_t> buf(dstLen);
97 if (compress2(buf.data(), &dstLen,
98 std::bit_cast<const Bytef*>(data.data()),
99 uLong(len), 9)
100 != Z_OK) {
101 throw MSXException("Error while compressing blob.");
102 }
103 tmp = Base64::encode(std::span{buf.data(), dstLen});
104 }
105 this->self().beginTag(tag);
106 this->self().attribute("encoding", encoding);
107 Saver<string> saver;
108 saver(this->self(), tmp, false);
109 this->self().endTag(tag);
110}
111
114
116
118{
119 auto* v = lookup(idMap, id);
120 return v ? *v : nullptr;
121}
122
123void InputArchiveBase2::addPointer(unsigned id, const void* p)
124{
125 assert(!idMap.contains(id));
126 idMap.emplace_noDuplicateCheck(id, const_cast<void*>(p));
127}
128
129unsigned InputArchiveBase2::getId(const void* ptr) const
130{
131 for (const auto& [id, pt] : idMap) {
132 if (pt == ptr) return id;
133 }
134 return 0;
135}
136
137template<typename Derived>
139 const char* tag, std::span<uint8_t> data, bool /*diff*/)
140{
141 this->self().beginTag(tag);
142 string encoding;
143 this->self().attribute("encoding", encoding);
144
145 string_view tmp = this->self().loadStr();
146 this->self().endTag(tag);
147
148 if (encoding == "gz-base64") {
149 auto [buf, bufSize] = Base64::decode(tmp);
150 auto dstLen = uLongf(data.size()); // TODO check for overflow?
151 if ((uncompress(std::bit_cast<Bytef*>(data.data()), &dstLen,
152 std::bit_cast<const Bytef*>(buf.data()), uLong(bufSize))
153 != Z_OK) ||
154 (dstLen != data.size())) {
155 throw MSXException("Error while decompressing blob.");
156 }
157 } else if (encoding == one_of("hex", "base64")) {
158 bool ok = (encoding == "hex")
159 ? HexDump::decode_inplace(tmp, data)
160 : Base64 ::decode_inplace(tmp, data);
161 if (!ok) {
162 throw XMLException(
163 "Length of decoded blob different from "
164 "expected value (", data.size(), ')');
165 }
166 } else {
167 throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
168 }
169}
170
173
175
176void MemOutputArchive::save(std::string_view s)
177{
178 auto size = s.size();
179 auto buf = buffer.allocate(sizeof(size) + size);
180 memcpy(buf.data(), &size, sizeof(size));
181 ranges::copy(s, subspan(buf, sizeof(size)));
182}
183
185{
186 return buffer.release(size);
187}
188
190
191void MemInputArchive::load(std::string& s)
192{
193 size_t length;
194 load(length);
195 s.resize(length);
196 if (length) {
197 get(s.data(), length);
198 }
199}
200
202{
203 size_t length;
204 load(length);
205 const uint8_t* p = buffer.getCurrentPos();
206 buffer.skip(length);
207 return {std::bit_cast<const char*>(p), length};
208}
209
211
212// Too small inputs don't compress very well (often the compressed size is even
213// bigger than the input). It also takes a relatively long time (because often
214// compression has a relatively large setup time). I choose this value
215// semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
216// registers won't be compressed.
217static constexpr size_t SMALL_SIZE = 64;
218void MemOutputArchive::serialize_blob(const char* /*tag*/, std::span<const uint8_t> data,
219 bool diff)
220{
221 // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
222 if (data.size() > SMALL_SIZE) {
223 auto deltaBlockIdx = unsigned(deltaBlocks.size());
224 save(deltaBlockIdx); // see comment below in MemInputArchive
225 deltaBlocks.push_back(diff
226 ? lastDeltaBlocks.createNew(data.data(), data)
227 : lastDeltaBlocks.createNullDiff(data.data(), data));
228 } else {
229 auto buf = buffer.allocate(data.size());
230 ranges::copy(data, buf);
231 }
232}
233
234void MemInputArchive::serialize_blob(const char* /*tag*/, std::span<uint8_t> data,
235 bool /*diff*/)
236{
237 if (data.size() > SMALL_SIZE) {
238 // Usually blobs are saved in the same order as they are loaded
239 // (via the serialize_blob() methods in respectively
240 // MemOutputArchive and MemInputArchive). In that case keeping
241 // track of the deltaBlockIdx in the savestate itself is
242 // redundant (it will simply be an increasing value). However
243 // in rare cases, via the {begin,end,skip)Section() methods, it
244 // is possible that certain blobs are stored in the savestate,
245 // but skipped while loading. That's why we do need the index.
246 unsigned deltaBlockIdx; load(deltaBlockIdx);
247 deltaBlocks[deltaBlockIdx]->apply(data);
248 } else {
249 ranges::copy(std::span{buffer.getCurrentPos(), data.size()}, data);
250 buffer.skip(data.size());
251 }
253
255
257 : filename(filename_)
258 , writer(*this)
259{
260 {
261 auto f = FileOperations::openFile(filename, "wb");
262 if (!f) error();
263 int duped_fd = dup(fileno(f.get()));
264 if (duped_fd == -1) error();
265 file = gzdopen(duped_fd, "wb9");
266 if (!file) {
267 ::close(duped_fd);
268 error();
269 }
270 // on scope-exit 'f' is closed, and 'file'
271 // uses the dup()'ed file descriptor.
272 }
273
274 static constexpr std::string_view header =
275 "<?xml version=\"1.0\" ?>\n"
276 "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
277 write(header);
278
279 writer.begin("serial");
280 writer.attribute("openmsx_version", Version::full());
281 writer.attribute("date_time", Date::toString(time(nullptr)));
282 writer.attribute("platform", TARGET_PLATFORM);
283}
284
286{
287 if (!file) return; // already closed
288
289 writer.end("serial");
290
291 if (gzclose(file) != Z_OK) {
292 error();
293 }
294 file = nullptr;
295}
296
298{
299 try {
300 close();
301 } catch (...) {
302 // Eat exception. Explicitly call close() if you want to handle errors.
303 }
304}
305
306void XmlOutputArchive::write(std::span<const char> buf)
307{
308 if ((gzwrite(file, buf.data(), unsigned(buf.size())) == 0) && !buf.empty()) {
309 error();
310 }
311}
312
314{
315 if (gzputc(file, c) == -1) {
316 error();
317 }
318}
319
320void XmlOutputArchive::check(bool condition) const
321{
322 assert(condition); (void)condition;
323}
324
326{
327 if (file) {
328 gzclose(file);
329 file = nullptr;
330 }
331 throw XMLException("could not write \"", filename, '"');
332}
333
335{
336 writer.data(std::string_view(&c, 1));
337}
338void XmlOutputArchive::save(std::string_view str)
339{
340 writer.data(str);
341}
343{
344 writer.data(b ? "true" : "false");
345}
346void XmlOutputArchive::save(unsigned char b)
347{
348 save(unsigned(b));
349}
350void XmlOutputArchive::save(signed char c)
351{
352 save(int(c));
353}
355{
356 save(int(c));
357}
359{
360 saveImpl(i);
361}
363{
364 saveImpl(u);
365}
366void XmlOutputArchive::save(unsigned long long ull)
367{
368 saveImpl(ull);
369}
370
371void XmlOutputArchive::attribute(const char* name, std::string_view str)
372{
373 writer.attribute(name, str);
374}
375void XmlOutputArchive::attribute(const char* name, int i)
376{
377 attributeImpl(name, i);
378}
379void XmlOutputArchive::attribute(const char* name, unsigned u)
380{
381 attributeImpl(name, u);
382}
383
384void XmlOutputArchive::beginTag(const char* tag)
385{
386 writer.begin(tag);
387}
388void XmlOutputArchive::endTag(const char* tag)
389{
390 writer.end(tag);
391}
392
394
395XmlInputArchive::XmlInputArchive(const string& filename)
396{
397 xmlDoc.load(filename, "openmsx-serialize.dtd");
398 const auto* root = xmlDoc.getRoot();
399 elems.emplace_back(root, root->getFirstChild());
400}
401
402string_view XmlInputArchive::loadStr() const
403{
404 if (currentElement()->hasChildren()) {
405 throw XMLException("No child tags expected for primitive type");
406 }
407 return currentElement()->getData();
408}
409void XmlInputArchive::load(string& t) const
410{
411 t = loadStr();
412}
413void XmlInputArchive::loadChar(char& c) const
414{
415 std::string str;
416 load(str);
417 std::istringstream is(str);
418 is >> c;
419}
420void XmlInputArchive::load(bool& b) const
421{
422 string_view s = loadStr();
423 if (s == one_of("true", "1")) {
424 b = true;
425 } else if (s == one_of("false", "0")) {
426 b = false;
427 } else {
428 throw XMLException("Bad value found for boolean: ", s);
429 }
430}
431
432// This function parses a number from a string. It's similar to the generic
433// templatized XmlInputArchive::load() method, but _much_ faster. It does
434// have some limitations though:
435// - it can't handle leading whitespace
436// - it can't handle extra characters at the end of the string
437// - it can only handle one base (only decimal, not octal or hexadecimal)
438// - it doesn't understand a leading '+' sign
439// - it doesn't detect overflow or underflow (The generic implementation sets
440// a 'bad' flag on the stream and clips the result to the min/max allowed
441// value. Though this 'bad' flag was ignored by the openMSX code).
442// This routine is only used to parse strings we've written ourselves (and the
443// savestate/replay XML files are not meant to be manually edited). So the
444// above limitations don't really matter. And we can use the speed gain.
445template<std::integral T> static inline void fastAtoi(string_view str, T& t)
446{
447 t = 0;
448 bool neg = false;
449 size_t i = 0;
450 size_t l = str.size();
451
452 if constexpr (std::numeric_limits<T>::is_signed) {
453 if (l == 0) return;
454 if (str[0] == '-') {
455 neg = true;
456 i = 1;
457 }
458 }
459 for (; i < l; ++i) {
460 unsigned d = str[i] - '0';
461 if (d > 9) [[unlikely]] {
462 throw XMLException("Invalid integer: ", str);
463 }
464 t = 10 * t + d;
465 }
466 if constexpr (std::numeric_limits<T>::is_signed) {
467 if (neg) t = -t;
468 } else {
469 assert(!neg); (void)neg;
470 }
471}
472void XmlInputArchive::load(int& i) const
473{
474 string_view str = loadStr();
475 fastAtoi(str, i);
476}
477void XmlInputArchive::load(unsigned& u) const
479 string_view str = loadStr();
480 try {
481 fastAtoi(str, u);
482 } catch (XMLException&) {
483 // One reason could be that the type of a member was corrected
484 // from 'int' to 'unsigned'. In that case loading an old
485 // savestate (that contains a negative value) might fail. So try
486 // again parsing as an 'int'.
487 int i;
488 fastAtoi(str, i);
489 u = narrow_cast<unsigned>(i);
490 }
491}
492void XmlInputArchive::load(unsigned long long& ull) const
493{
494 string_view str = loadStr();
495 fastAtoi(str, ull);
496}
497void XmlInputArchive::load(unsigned char& b) const
498{
499 unsigned u;
500 load(u);
501 b = narrow_cast<unsigned char>(u);
502}
503void XmlInputArchive::load(signed char& c) const
504{
505 int i;
506 load(i);
507 c = narrow_cast<signed char>(i);
508}
509void XmlInputArchive::load(char& c) const
510{
511 int i;
512 load(i);
513 c = narrow_cast<char>(i);
514}
515
516void XmlInputArchive::beginTag(const char* tag)
517{
518 const auto* child = currentElement()->findChild(tag, elems.back().second);
519 if (!child) {
520 string path;
521 for (auto& [e, _] : elems) {
522 strAppend(path, e->getName(), '/');
523 }
524 throw XMLException("No child tag \"", tag,
525 "\" found at location \"", path, '\"');
526 }
527 elems.emplace_back(child, child->getFirstChild());
528}
529void XmlInputArchive::endTag(const char* tag)
530{
531 const auto& elem = *currentElement();
532 if (elem.getName() != tag) {
533 throw XMLException("End tag \"", elem.getName(),
534 "\" not equal to begin tag \"", tag, "\"");
535 }
536 auto& elem2 = const_cast<XMLElement&>(elem);
537 elem2.clearName(); // mark this elem for later beginTag() calls
538 elems.pop_back();
539}
540
541void XmlInputArchive::attribute(const char* name, string& t) const
542{
543 const auto* attr = currentElement()->findAttribute(name);
544 if (!attr) {
545 throw XMLException("Missing attribute \"", name, "\".");
546 }
547 t = attr->getValue();
548}
549void XmlInputArchive::attribute(const char* name, int& i) const
550{
551 attributeImpl(name, i);
552}
553void XmlInputArchive::attribute(const char* name, unsigned& u) const
554{
555 attributeImpl(name, u);
556}
557bool XmlInputArchive::hasAttribute(const char* name) const
558{
559 return currentElement()->findAttribute(name);
560}
562{
563 return int(currentElement()->numChildren());
564}
565
566} // namespace openmsx
uintptr_t id
TclObject t
bool contains(const K &k) const
Definition hash_map.hh:110
iterator emplace_noDuplicateCheck(Args &&... args)
Definition hash_set.hh:472
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition serialize.hh:248
void * getPointer(unsigned id)
Definition serialize.cc:117
unsigned getId(const void *p) const
Definition serialize.cc:129
void addPointer(unsigned id, const void *p)
Definition serialize.cc:123
void serialize_blob(const char *tag, std::span< uint8_t > data, bool diff=true)
Definition serialize.cc:138
const uint8_t * getCurrentPos() const
Return a pointer to the current position in the buffer.
void skip(size_t len)
Skip the given number of bytes.
std::shared_ptr< DeltaBlock > createNew(const void *id, std::span< const uint8_t > data)
std::shared_ptr< DeltaBlock > createNullDiff(const void *id, std::span< const uint8_t > data)
This class manages the lifetime of a block of memory.
Definition MemBuffer.hh:29
const T * data() const
Returns pointer to the start of the memory buffer.
Definition MemBuffer.hh:81
void serialize_blob(const char *tag, std::span< uint8_t > data, bool diff=true)
Definition serialize.cc:234
std::string_view loadStr()
Definition serialize.cc:201
void save(const T &t)
Definition serialize.hh:674
void serialize_blob(const char *tag, std::span< const uint8_t > data, bool diff=true)
Definition serialize.cc:218
MemBuffer< uint8_t > releaseBuffer(size_t &size)
Definition serialize.cc:184
void serialize_blob(const char *tag, std::span< const uint8_t > data, bool diff=true)
Definition serialize.cc:79
std::span< uint8_t > allocate(size_t len)
Reserve space to insert the given number of bytes.
MemBuffer< uint8_t > release(size_t &size)
Release ownership of the buffer.
unsigned size() const
Definition TclObject.hh:177
static std::string full()
Definition Version.cc:8
const XMLElement * getRoot() const
void load(const std::string &filename, std::string_view systemID)
const XMLAttribute * findAttribute(std::string_view attrName) const
Definition XMLElement.cc:96
const XMLElement * findChild(std::string_view childName) const
Definition XMLElement.cc:21
std::string_view getData() const
void attribute(const char *name, T &t)
Definition serialize.hh:996
bool hasAttribute(const char *name) const
Definition serialize.cc:557
void load(bool &b) const
Definition serialize.cc:420
void endTag(const char *tag)
Definition serialize.cc:529
XmlInputArchive(const std::string &filename)
Definition serialize.cc:395
const XMLElement * currentElement() const
Definition serialize.hh:977
std::string_view loadStr() const
Definition serialize.cc:402
void attributeImpl(const char *name, T &t) const
Definition serialize.hh:989
void loadChar(char &c) const
Definition serialize.cc:413
void beginTag(const char *tag)
Definition serialize.cc:516
void saveImpl(const T &t)
Definition serialize.hh:864
void check(bool condition) const
Definition serialize.cc:320
void write(std::span< const char > buf)
Definition serialize.cc:306
void save(const T &t)
Definition serialize.hh:870
XmlOutputArchive(zstring_view filename)
Definition serialize.cc:256
void attributeImpl(const char *name, const T &t)
Definition serialize.hh:908
void endTag(const char *tag)
Definition serialize.cc:388
void beginTag(const char *tag)
Definition serialize.cc:384
void attribute(const char *name, const T &t)
Definition serialize.hh:912
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition hash_map.hh:118
bool decode_inplace(std::string_view input, std::span< uint8_t > output)
Definition HexDump.cc:76
std::string toString(time_t time)
Definition Date.cc:152
FILE_t openFile(zstring_view filename, zstring_view mode)
Call fopen() in a platform-independent manner.
This file implemented 3 utility functions:
Definition Autofire.cc:11
auto copy(InputRange &&range, OutputIter out)
Definition ranges.hh:250
constexpr auto subspan(Range &&range, size_t offset, size_t count=std::dynamic_extent)
Definition ranges.hh:471
void strAppend(std::string &result, Ts &&...ts)
Definition strCat.hh:752