openMSX
serialize.cc
Go to the documentation of this file.
1#include "serialize.hh"
2#include "Base64.hh"
3#include "HexDump.hh"
4#include "XMLElement.hh"
5#include "ConfigException.hh"
6#include "XMLException.hh"
7#include "DeltaBlock.hh"
8#include "MemBuffer.hh"
9#include "FileOperations.hh"
10#include "StringOp.hh"
11#include "Version.hh"
12#include "Date.hh"
13#include "narrow.hh"
14#include "one_of.hh"
15#include "stl.hh"
16#include "build-info.hh"
17#include "cstdiop.hh" // for dup()
18#include <cstdint>
19#include <cstring>
20#include <iostream>
21#include <limits>
22
23using std::string;
24using std::string_view;
25
26namespace openmsx {
27
28template<typename Derived>
29void ArchiveBase<Derived>::attribute(const char* name, const char* value)
30{
31 string valueStr(value);
32 self().attribute(name, valueStr);
33}
36
38
39unsigned OutputArchiveBase2::generateID1(const void* p)
40{
41 #ifdef linux
42 assert("Can't serialize ID of object located on the stack" &&
43 !addressOnStack(p));
44 #endif
45 ++lastId;
46 assert(!polyIdMap.contains(p));
47 polyIdMap.emplace_noDuplicateCheck(p, lastId);
48 return lastId;
49}
50unsigned OutputArchiveBase2::generateID2(
51 const void* p, const std::type_info& typeInfo)
52{
53 #ifdef linux
54 assert("Can't serialize ID of object located on the stack" &&
55 !addressOnStack(p));
56 #endif
57 ++lastId;
58 auto key = std::pair(p, std::type_index(typeInfo));
59 assert(!idMap.contains(key));
60 idMap.emplace_noDuplicateCheck(key, lastId);
61 return lastId;
62}
63
64unsigned OutputArchiveBase2::getID1(const void* p)
65{
66 auto* v = lookup(polyIdMap, p);
67 return v ? *v : 0;
68}
69unsigned OutputArchiveBase2::getID2(
70 const void* p, const std::type_info& typeInfo)
71{
72 auto* v = lookup(idMap, std::pair(p, std::type_index(typeInfo)));
73 return v ? *v : 0;
74}
75
76
77template<typename Derived>
79 const char* tag, std::span<const uint8_t> data, bool /*diff*/)
80{
81 string encoding;
82 string tmp;
83 if (false) {
84 // useful for debugging
85 encoding = "hex";
86 tmp = HexDump::encode(data);
87 } else if (false) {
88 encoding = "base64";
89 tmp = Base64::encode(data);
90 } else {
91 encoding = "gz-base64";
92 // TODO check for overflow?
93 auto len = data.size();
94 auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
95 MemBuffer<uint8_t> buf(dstLen);
96 if (compress2(buf.data(), &dstLen,
97 reinterpret_cast<const Bytef*>(data.data()),
98 uLong(len), 9)
99 != Z_OK) {
100 throw MSXException("Error while compressing blob.");
101 }
102 tmp = Base64::encode(std::span{buf.data(), dstLen});
103 }
104 this->self().beginTag(tag);
105 this->self().attribute("encoding", encoding);
106 Saver<string> saver;
107 saver(this->self(), tmp, false);
108 this->self().endTag(tag);
109}
110
113
115
117{
118 auto* v = lookup(idMap, id);
119 return v ? *v : nullptr;
120}
121
122void InputArchiveBase2::addPointer(unsigned id, const void* p)
123{
124 assert(!idMap.contains(id));
125 idMap.emplace_noDuplicateCheck(id, const_cast<void*>(p));
126}
127
128unsigned InputArchiveBase2::getId(const void* ptr) const
129{
130 for (const auto& [id, pt] : idMap) {
131 if (pt == ptr) return id;
132 }
133 return 0;
134}
135
136template<typename Derived>
138 const char* tag, std::span<uint8_t> data, bool /*diff*/)
139{
140 this->self().beginTag(tag);
141 string encoding;
142 this->self().attribute("encoding", encoding);
143
144 string_view tmp = this->self().loadStr();
145 this->self().endTag(tag);
146
147 if (encoding == "gz-base64") {
148 auto [buf, bufSize] = Base64::decode(tmp);
149 auto dstLen = uLongf(data.size()); // TODO check for overflow?
150 if ((uncompress(reinterpret_cast<Bytef*>(data.data()), &dstLen,
151 reinterpret_cast<const Bytef*>(buf.data()), uLong(bufSize))
152 != Z_OK) ||
153 (dstLen != data.size())) {
154 throw MSXException("Error while decompressing blob.");
155 }
156 } else if (encoding == one_of("hex", "base64")) {
157 bool ok = (encoding == "hex")
158 ? HexDump::decode_inplace(tmp, data)
159 : Base64 ::decode_inplace(tmp, data);
160 if (!ok) {
161 throw XMLException(
162 "Length of decoded blob different from "
163 "expected value (", data.size(), ')');
164 }
165 } else {
166 throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
167 }
168}
169
172
174
175void MemOutputArchive::save(std::string_view s)
176{
177 auto size = s.size();
178 auto buf = buffer.allocate(sizeof(size) + size);
179 memcpy(buf.data(), &size, sizeof(size));
180 ranges::copy(s, subspan(buf, sizeof(size)));
181}
182
184{
185 return buffer.release(size);
186}
187
189
190void MemInputArchive::load(std::string& s)
191{
192 size_t length;
193 load(length);
194 s.resize(length);
195 if (length) {
196 get(s.data(), length);
197 }
198}
199
201{
202 size_t length;
203 load(length);
204 const uint8_t* p = buffer.getCurrentPos();
205 buffer.skip(length);
206 return {reinterpret_cast<const char*>(p), length};
207}
208
210
211// Too small inputs don't compress very well (often the compressed size is even
212// bigger than the input). It also takes a relatively long time (because often
213// compression has a relatively large setup time). I choose this value
214// semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
215// registers won't be compressed.
216static constexpr size_t SMALL_SIZE = 64;
217void MemOutputArchive::serialize_blob(const char* /*tag*/, std::span<const uint8_t> data,
218 bool diff)
219{
220 // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
221 if (data.size() > SMALL_SIZE) {
222 auto deltaBlockIdx = unsigned(deltaBlocks.size());
223 save(deltaBlockIdx); // see comment below in MemInputArchive
224 deltaBlocks.push_back(diff
225 ? lastDeltaBlocks.createNew(data.data(), data)
226 : lastDeltaBlocks.createNullDiff(data.data(), data));
227 } else {
228 auto buf = buffer.allocate(data.size());
229 ranges::copy(data, buf);
230 }
231}
232
233void MemInputArchive::serialize_blob(const char* /*tag*/, std::span<uint8_t> data,
234 bool /*diff*/)
235{
236 if (data.size() > SMALL_SIZE) {
237 // Usually blobs are saved in the same order as they are loaded
238 // (via the serialize_blob() methods in respectively
239 // MemOutputArchive and MemInputArchive). In that case keeping
240 // track of the deltaBlockIdx in the savestate itself is
241 // redundant (it will simply be an increasing value). However
242 // in rare cases, via the {begin,end,skip)Section() methods, it
243 // is possible that certain blobs are stored in the savestate,
244 // but skipped while loading. That's why we do need the index.
245 unsigned deltaBlockIdx; load(deltaBlockIdx);
246 deltaBlocks[deltaBlockIdx]->apply(data);
247 } else {
248 ranges::copy(std::span{buffer.getCurrentPos(), data.size()}, data);
249 buffer.skip(data.size());
250 }
251}
254
256 : filename(filename_)
257 , writer(*this)
258{
259 {
260 auto f = FileOperations::openFile(filename, "wb");
261 if (!f) error();
262 int duped_fd = dup(fileno(f.get()));
263 if (duped_fd == -1) error();
264 file = gzdopen(duped_fd, "wb9");
265 if (!file) {
266 ::close(duped_fd);
267 error();
268 }
269 // on scope-exit 'f' is closed, and 'file'
270 // uses the dup()'ed file descriptor.
271 }
272
273 static constexpr std::string_view header =
274 "<?xml version=\"1.0\" ?>\n"
275 "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
276 write(header);
277
278 writer.begin("serial");
279 writer.attribute("openmsx_version", Version::full());
280 writer.attribute("date_time", Date::toString(time(nullptr)));
281 writer.attribute("platform", TARGET_PLATFORM);
282}
283
285{
286 if (!file) return; // already closed
287
288 writer.end("serial");
289
290 if (gzclose(file) != Z_OK) {
291 error();
292 }
293 file = nullptr;
294}
295
297{
298 try {
299 close();
300 } catch (...) {
301 // Eat exception. Explicitly call close() if you want to handle errors.
302 }
303}
304
305void XmlOutputArchive::write(std::span<const char> buf)
306{
307 if ((gzwrite(file, buf.data(), unsigned(buf.size())) == 0) && !buf.empty()) {
308 error();
309 }
310}
311
313{
314 if (gzputc(file, c) == -1) {
315 error();
316 }
317}
318
319void XmlOutputArchive::check(bool condition) const
320{
321 assert(condition); (void)condition;
322}
323
325{
326 if (file) {
327 gzclose(file);
328 file = nullptr;
329 }
330 throw XMLException("could not write \"", filename, '"');
331}
332
334{
335 writer.data(std::string_view(&c, 1));
336}
337void XmlOutputArchive::save(std::string_view str)
338{
339 writer.data(str);
340}
342{
343 writer.data(b ? "true" : "false");
344}
345void XmlOutputArchive::save(unsigned char b)
346{
347 save(unsigned(b));
348}
349void XmlOutputArchive::save(signed char c)
350{
351 save(int(c));
352}
354{
355 save(int(c));
356}
358{
359 saveImpl(i);
360}
362{
363 saveImpl(u);
364}
365void XmlOutputArchive::save(unsigned long long ull)
366{
367 saveImpl(ull);
368}
369
370void XmlOutputArchive::attribute(const char* name, std::string_view str)
371{
372 writer.attribute(name, str);
373}
374void XmlOutputArchive::attribute(const char* name, int i)
375{
376 attributeImpl(name, i);
377}
378void XmlOutputArchive::attribute(const char* name, unsigned u)
379{
380 attributeImpl(name, u);
381}
382
383void XmlOutputArchive::beginTag(const char* tag)
384{
385 writer.begin(tag);
386}
387void XmlOutputArchive::endTag(const char* tag)
388{
389 writer.end(tag);
390}
391
393
394XmlInputArchive::XmlInputArchive(const string& filename)
395{
396 xmlDoc.load(filename, "openmsx-serialize.dtd");
397 const auto* root = xmlDoc.getRoot();
398 elems.emplace_back(root, root->getFirstChild());
399}
400
402{
403 if (currentElement()->hasChildren()) {
404 throw XMLException("No child tags expected for primitive type");
405 }
406 return currentElement()->getData();
407}
409{
410 t = loadStr();
411}
413{
414 std::string str;
415 load(str);
416 std::istringstream is(str);
417 is >> c;
418}
420{
421 string_view s = loadStr();
422 if (s == one_of("true", "1")) {
423 b = true;
424 } else if (s == one_of("false", "0")) {
425 b = false;
426 } else {
427 throw XMLException("Bad value found for boolean: ", s);
428 }
429}
430
431// This function parses a number from a string. It's similar to the generic
432// templatized XmlInputArchive::load() method, but _much_ faster. It does
433// have some limitations though:
434// - it can't handle leading whitespace
435// - it can't handle extra characters at the end of the string
436// - it can only handle one base (only decimal, not octal or hexadecimal)
437// - it doesn't understand a leading '+' sign
438// - it doesn't detect overflow or underflow (The generic implementation sets
439// a 'bad' flag on the stream and clips the result to the min/max allowed
440// value. Though this 'bad' flag was ignored by the openMSX code).
441// This routine is only used to parse strings we've written ourselves (and the
442// savestate/replay XML files are not meant to be manually edited). So the
443// above limitations don't really matter. And we can use the speed gain.
444template<std::integral T> static inline void fastAtoi(string_view str, T& t)
445{
446 t = 0;
447 bool neg = false;
448 size_t i = 0;
449 size_t l = str.size();
450
451 if constexpr (std::numeric_limits<T>::is_signed) {
452 if (l == 0) return;
453 if (str[0] == '-') {
454 neg = true;
455 i = 1;
456 }
457 }
458 for (; i < l; ++i) {
459 unsigned d = str[i] - '0';
460 if (d > 9) [[unlikely]] {
461 throw XMLException("Invalid integer: ", str);
462 }
463 t = 10 * t + d;
464 }
465 if constexpr (std::numeric_limits<T>::is_signed) {
466 if (neg) t = -t;
467 } else {
468 assert(!neg); (void)neg;
469 }
470}
472{
473 string_view str = loadStr();
474 fastAtoi(str, i);
475}
476void XmlInputArchive::load(unsigned& u)
477{
478 string_view str = loadStr();
479 try {
480 fastAtoi(str, u);
481 } catch (XMLException&) {
482 // One reason could be that the type of a member was corrected
483 // from 'int' to 'unsigned'. In that case loading an old
484 // savestate (that contains a negative value) might fail. So try
485 // again parsing as an 'int'.
486 int i;
487 fastAtoi(str, i);
488 u = narrow_cast<unsigned>(i);
489 }
490}
491void XmlInputArchive::load(unsigned long long& ull)
492{
493 string_view str = loadStr();
494 fastAtoi(str, ull);
495}
496void XmlInputArchive::load(unsigned char& b)
497{
498 unsigned u;
499 load(u);
500 b = narrow_cast<unsigned char>(u);
501}
502void XmlInputArchive::load(signed char& c)
503{
504 int i;
505 load(i);
506 c = narrow_cast<signed char>(i);
507}
509{
510 int i;
511 load(i);
512 c = narrow_cast<char>(i);
513}
514
515void XmlInputArchive::beginTag(const char* tag)
516{
517 const auto* child = currentElement()->findChild(tag, elems.back().second);
518 if (!child) {
519 string path;
520 for (auto& e : elems) {
521 strAppend(path, e.first->getName(), '/');
522 }
523 throw XMLException("No child tag \"", tag,
524 "\" found at location \"", path, '\"');
525 }
526 elems.emplace_back(child, child->getFirstChild());
527}
528void XmlInputArchive::endTag(const char* tag)
529{
530 const auto& elem = *currentElement();
531 if (elem.getName() != tag) {
532 throw XMLException("End tag \"", elem.getName(),
533 "\" not equal to begin tag \"", tag, "\"");
534 }
535 auto& elem2 = const_cast<XMLElement&>(elem);
536 elem2.clearName(); // mark this elem for later beginTag() calls
537 elems.pop_back();
538}
539
540void XmlInputArchive::attribute(const char* name, string& t)
541{
542 const auto* attr = currentElement()->findAttribute(name);
543 if (!attr) {
544 throw XMLException("Missing attribute \"", name, "\".");
545 }
546 t = attr->getValue();
547}
548void XmlInputArchive::attribute(const char* name, int& i)
549{
550 attributeImpl(name, i);
551}
552void XmlInputArchive::attribute(const char* name, unsigned& u)
553{
554 attributeImpl(name, u);
555}
556bool XmlInputArchive::hasAttribute(const char* name) const
557{
558 return currentElement()->findAttribute(name);
559}
561{
562 return int(currentElement()->numChildren());
563}
564
565} // namespace openmsx
uintptr_t id
Definition: Interpreter.cc:27
TclObject t
bool contains(const K &k) const
Definition: hash_map.hh:110
iterator emplace_noDuplicateCheck(Args &&... args)
Definition: hash_set.hh:472
Definition: one_of.hh:7
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition: serialize.hh:248
void * getPointer(unsigned id)
Definition: serialize.cc:116
unsigned getId(const void *p) const
Definition: serialize.cc:128
void addPointer(unsigned id, const void *p)
Definition: serialize.cc:122
void serialize_blob(const char *tag, std::span< uint8_t > data, bool diff=true)
Definition: serialize.cc:137
const uint8_t * getCurrentPos() const
Return a pointer to the current position in the buffer.
void skip(size_t len)
Skip the given number of bytes.
std::shared_ptr< DeltaBlock > createNew(const void *id, std::span< const uint8_t > data)
Definition: DeltaBlock.cc:358
std::shared_ptr< DeltaBlock > createNullDiff(const void *id, std::span< const uint8_t > data)
Definition: DeltaBlock.cc:395
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
void serialize_blob(const char *tag, std::span< uint8_t > data, bool diff=true)
Definition: serialize.cc:233
std::string_view loadStr()
Definition: serialize.cc:200
void save(const T &t)
Definition: serialize.hh:674
void serialize_blob(const char *tag, std::span< const uint8_t > data, bool diff=true)
Definition: serialize.cc:217
MemBuffer< uint8_t > releaseBuffer(size_t &size)
Definition: serialize.cc:183
void serialize_blob(const char *tag, std::span< const uint8_t > data, bool diff=true)
Definition: serialize.cc:78
std::span< uint8_t > allocate(size_t len)
Reserve space to insert the given number of bytes.
MemBuffer< uint8_t > release(size_t &size)
Release ownership of the buffer.
unsigned size() const
Definition: TclObject.hh:167
static std::string full()
Definition: Version.cc:8
const XMLElement * getRoot() const
Definition: XMLElement.hh:269
void load(const std::string &filename, std::string_view systemID)
Definition: XMLElement.cc:319
const XMLAttribute * findAttribute(std::string_view attrName) const
Definition: XMLElement.cc:95
const XMLElement * findChild(std::string_view childName) const
Definition: XMLElement.cc:19
std::string_view getData() const
Definition: XMLElement.hh:173
void attribute(const char *name, T &t)
Definition: serialize.hh:996
bool hasAttribute(const char *name) const
Definition: serialize.cc:556
std::string_view loadStr()
Definition: serialize.cc:401
void endTag(const char *tag)
Definition: serialize.cc:528
int countChildren() const
Definition: serialize.cc:560
XmlInputArchive(const std::string &filename)
Definition: serialize.cc:394
const XMLElement * currentElement() const
Definition: serialize.hh:977
void attributeImpl(const char *name, T &t)
Definition: serialize.hh:989
void beginTag(const char *tag)
Definition: serialize.cc:515
void loadChar(char &c)
Definition: serialize.cc:412
void saveImpl(const T &t)
Definition: serialize.hh:864
void check(bool condition) const
Definition: serialize.cc:319
void write(std::span< const char > buf)
Definition: serialize.cc:305
void save(const T &t)
Definition: serialize.hh:870
XmlOutputArchive(zstring_view filename)
Definition: serialize.cc:255
void attributeImpl(const char *name, const T &t)
Definition: serialize.hh:908
void endTag(const char *tag)
Definition: serialize.cc:387
void beginTag(const char *tag)
Definition: serialize.cc:383
void attribute(const char *name, const T &t)
Definition: serialize.hh:912
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
Definition: zstring_view.hh:22
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition: hash_map.hh:118
bool decode_inplace(std::string_view input, std::span< uint8_t > output)
Definition: Base64.cc:125
bool decode_inplace(std::string_view input, std::span< uint8_t > output)
Definition: HexDump.cc:73
constexpr double e
Definition: Math.hh:20
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:340
std::string toString(time_t time)
Definition: Date.cc:152
FILE_t openFile(zstring_view filename, zstring_view mode)
Call fopen() in a platform-independent manner.
This file implemented 3 utility functions:
Definition: Autofire.cc:9
auto copy(InputRange &&range, OutputIter out)
Definition: ranges.hh:232
size_t size(std::string_view utf8)
constexpr auto subspan(Range &&range, size_t offset, size_t count=std::dynamic_extent)
Definition: ranges.hh:446
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:620