openMSX
serialize.cc
Go to the documentation of this file.
1 #include "serialize.hh"
2 #include "Base64.hh"
3 #include "HexDump.hh"
4 #include "XMLElement.hh"
5 #include "ConfigException.hh"
6 #include "XMLException.hh"
7 #include "DeltaBlock.hh"
8 #include "MemBuffer.hh"
9 #include "FileOperations.hh"
10 #include "StringOp.hh"
11 #include "Version.hh"
12 #include "Date.hh"
13 #include "one_of.hh"
14 #include "stl.hh"
15 #include "build-info.hh"
16 #include "cstdiop.hh" // for dup()
17 #include <cstring>
18 #include <iostream>
19 #include <limits>
20 
21 using std::string;
22 using std::string_view;
23 
24 namespace openmsx {
25 
26 template<typename Derived>
27 void ArchiveBase<Derived>::attribute(const char* name, const char* value)
28 {
29  string valueStr(value);
30  self().attribute(name, valueStr);
31 }
32 template class ArchiveBase<MemOutputArchive>;
33 template class ArchiveBase<XmlOutputArchive>;
34 
36 
37 unsigned OutputArchiveBase2::generateID1(const void* p)
38 {
39  #ifdef linux
40  assert("Can't serialize ID of object located on the stack" &&
41  !addressOnStack(p));
42  #endif
43  ++lastId;
44  assert(!polyIdMap.contains(p));
45  polyIdMap.emplace_noDuplicateCheck(p, lastId);
46  return lastId;
47 }
48 unsigned OutputArchiveBase2::generateID2(
49  const void* p, const std::type_info& typeInfo)
50 {
51  #ifdef linux
52  assert("Can't serialize ID of object located on the stack" &&
53  !addressOnStack(p));
54  #endif
55  ++lastId;
56  auto key = std::pair(p, std::type_index(typeInfo));
57  assert(!idMap.contains(key));
58  idMap.emplace_noDuplicateCheck(key, lastId);
59  return lastId;
60 }
61 
62 unsigned OutputArchiveBase2::getID1(const void* p)
63 {
64  auto* v = lookup(polyIdMap, p);
65  return v ? *v : 0;
66 }
67 unsigned OutputArchiveBase2::getID2(
68  const void* p, const std::type_info& typeInfo)
69 {
70  auto* v = lookup(idMap, std::pair(p, std::type_index(typeInfo)));
71  return v ? *v : 0;
72 }
73 
74 
75 template<typename Derived>
77  const char* tag, const void* data_, size_t len, bool /*diff*/)
78 {
79  const auto* data = static_cast<const uint8_t*>(data_);
80 
81  string encoding;
82  string tmp;
83  if (false) {
84  // useful for debugging
85  encoding = "hex";
86  tmp = HexDump::encode(data, len);
87  } else if (false) {
88  encoding = "base64";
89  tmp = Base64::encode(data, len);
90  } else {
91  encoding = "gz-base64";
92  // TODO check for overflow?
93  auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
94  MemBuffer<uint8_t> buf(dstLen);
95  if (compress2(buf.data(), &dstLen,
96  reinterpret_cast<const Bytef*>(data),
97  uLong(len), 9)
98  != Z_OK) {
99  throw MSXException("Error while compressing blob.");
100  }
101  tmp = Base64::encode(buf.data(), dstLen);
102  }
103  this->self().beginTag(tag);
104  this->self().attribute("encoding", encoding);
105  Saver<string> saver;
106  saver(this->self(), tmp, false);
107  this->self().endTag(tag);
108 }
109 
112 
114 
116 {
117  auto* v = lookup(idMap, id);
118  return v ? *v : nullptr;
119 }
120 
121 void InputArchiveBase2::addPointer(unsigned id, const void* p)
122 {
123  assert(!idMap.contains(id));
124  idMap.emplace_noDuplicateCheck(id, const_cast<void*>(p));
125 }
126 
127 unsigned InputArchiveBase2::getId(const void* ptr) const
128 {
129  for (const auto& [id, pt] : idMap) {
130  if (pt == ptr) return id;
131  }
132  return 0;
133 }
134 
135 template<typename Derived>
137  const char* tag, void* data, size_t len, bool /*diff*/)
138 {
139  this->self().beginTag(tag);
140  string encoding;
141  this->self().attribute("encoding", encoding);
142 
143  string_view tmp = this->self().loadStr();
144  this->self().endTag(tag);
145 
146  if (encoding == "gz-base64") {
147  auto [buf, bufSize] = Base64::decode(tmp);
148  auto dstLen = uLongf(len); // TODO check for overflow?
149  if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
150  reinterpret_cast<const Bytef*>(buf.data()), uLong(bufSize))
151  != Z_OK) ||
152  (dstLen != len)) {
153  throw MSXException("Error while decompressing blob.");
154  }
155  } else if (encoding == one_of("hex", "base64")) {
156  bool ok = (encoding == "hex")
157  ? HexDump::decode_inplace(tmp, static_cast<uint8_t*>(data), len)
158  : Base64 ::decode_inplace(tmp, static_cast<uint8_t*>(data), len);
159  if (!ok) {
160  throw XMLException(
161  "Length of decoded blob different from "
162  "expected value (", len, ')');
163  }
164  } else {
165  throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
166  }
167 }
168 
169 template class InputArchiveBase<MemInputArchive>;
170 template class InputArchiveBase<XmlInputArchive>;
171 
173 
174 void MemOutputArchive::save(std::string_view s)
175 {
176  auto size = s.size();
177  uint8_t* buf = buffer.allocate(sizeof(size) + size);
178  memcpy(buf, &size, sizeof(size));
179  if (size) {
180  memcpy(buf + sizeof(size), s.data(), size);
181  }
182 }
183 
185 {
186  return buffer.release(size);
187 }
188 
190 
191 void MemInputArchive::load(std::string& s)
192 {
193  size_t length;
194  load(length);
195  s.resize(length);
196  if (length) {
197  get(&s[0], length);
198  }
199 }
200 
202 {
203  size_t length;
204  load(length);
205  const uint8_t* p = buffer.getCurrentPos();
206  buffer.skip(length);
207  return string_view(reinterpret_cast<const char*>(p), length);
208 }
209 
211 
212 // Too small inputs don't compress very well (often the compressed size is even
213 // bigger than the input). It also takes a relatively long time (because often
214 // compression has a relatively large setup time). I choose this value
215 // semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
216 // registers won't be compressed.
217 constexpr size_t SMALL_SIZE = 64;
218 void MemOutputArchive::serialize_blob(const char* /*tag*/, const void* data,
219  size_t len, bool diff)
220 {
221  // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
222  if (len > SMALL_SIZE) {
223  auto deltaBlockIdx = unsigned(deltaBlocks.size());
224  save(deltaBlockIdx); // see comment below in MemInputArchive
225  deltaBlocks.push_back(diff
226  ? lastDeltaBlocks.createNew(
227  data, static_cast<const uint8_t*>(data), len)
228  : lastDeltaBlocks.createNullDiff(
229  data, static_cast<const uint8_t*>(data), len));
230  } else {
231  uint8_t* buf = buffer.allocate(len);
232  memcpy(buf, data, len);
233  }
234 
235 }
236 
237 void MemInputArchive::serialize_blob(const char* /*tag*/, void* data,
238  size_t len, bool /*diff*/)
239 {
240  if (len > SMALL_SIZE) {
241  // Usually blobs are saved in the same order as they are loaded
242  // (via the serialize_blob() methods in respectively
243  // MemOutputArchive and MemInputArchive). In that case keeping
244  // track of the deltaBlockIdx in the savestate itself is
245  // redundant (it will simply be an increasing value). However
246  // in rare cases, via the {begin,end,skip)Section() methods, it
247  // is possible that certain blobs are stored in the savestate,
248  // but skipped while loading. That's why we do need the index.
249  unsigned deltaBlockIdx; load(deltaBlockIdx);
250  deltaBlocks[deltaBlockIdx]->apply(static_cast<uint8_t*>(data), len);
251  } else {
252  memcpy(data, buffer.getCurrentPos(), len);
253  buffer.skip(len);
254  }
255 }
256 
258 
260  : filename(filename_)
261  , writer(*this)
262 {
263  {
264  auto f = FileOperations::openFile(filename, "wb");
265  if (!f) error();
266  int duped_fd = dup(fileno(f.get()));
267  if (duped_fd == -1) error();
268  file = gzdopen(duped_fd, "wb9");
269  if (!file) {
270  ::close(duped_fd);
271  error();
272  }
273  // on scope-exit 'f' is closed, and 'file'
274  // uses the dup()'ed file descriptor.
275  }
276 
277  static constexpr std::string_view header =
278  "<?xml version=\"1.0\" ?>\n"
279  "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
280  write(header.data(), header.size());
281 
282  writer.begin("serial");
283  writer.attribute("openmsx_version", Version::full());
284  writer.attribute("date_time", Date::toString(time(nullptr)));
285  writer.attribute("platform", TARGET_PLATFORM);
286 }
287 
289 {
290  if (!file) return; // already closed
291 
292  writer.end("serial");
293 
294  if (gzclose(file) != Z_OK) {
295  error();
296  }
297  file = nullptr;
298 }
299 
301 {
302  try {
303  close();
304  } catch (...) {
305  // Eat exception. Explicitly call close() if you want to handle errors.
306  }
307 }
308 
309 void XmlOutputArchive::write(const char* buf, size_t len)
310 {
311  if ((gzwrite(file, buf, unsigned(len)) == 0) && (len != 0)) {
312  error();
313  }
314 }
315 
317 {
318  if (gzputc(file, c) == -1) {
319  error();
320  }
321 }
322 
323 void XmlOutputArchive::check(bool condition) const
324 {
325  assert(condition); (void)condition;
326 }
327 
329 {
330  if (file) {
331  gzclose(file);
332  file = nullptr;
333  }
334  throw XMLException("could not write \"", filename, '"');
335 }
336 
338 {
339  writer.data(std::string_view(&c, 1));
340 }
341 void XmlOutputArchive::save(std::string_view str)
342 {
343  writer.data(str);
344 }
346 {
347  writer.data(b ? "true" : "false");
348 }
349 void XmlOutputArchive::save(unsigned char b)
350 {
351  save(unsigned(b));
352 }
353 void XmlOutputArchive::save(signed char c)
354 {
355  save(int(c));
356 }
358 {
359  save(int(c));
360 }
362 {
363  saveImpl(i);
364 }
365 void XmlOutputArchive::save(unsigned u)
366 {
367  saveImpl(u);
368 }
369 void XmlOutputArchive::save(unsigned long long ull)
370 {
371  saveImpl(ull);
372 }
373 
374 void XmlOutputArchive::attribute(const char* name, std::string_view str)
375 {
376  writer.attribute(name, str);
377 }
378 void XmlOutputArchive::attribute(const char* name, int i)
379 {
380  attributeImpl(name, i);
381 }
382 void XmlOutputArchive::attribute(const char* name, unsigned u)
383 {
384  attributeImpl(name, u);
385 }
386 
387 void XmlOutputArchive::beginTag(const char* tag)
388 {
389  writer.begin(tag);
390 }
391 void XmlOutputArchive::endTag(const char* tag)
392 {
393  writer.end(tag);
394 }
395 
397 
399 {
400  xmlDoc.load(filename, "openmsx-serialize.dtd");
401  const auto* root = xmlDoc.getRoot();
402  elems.emplace_back(root, root->getFirstChild());
403 }
404 
406 {
407  if (currentElement()->hasChildren()) {
408  throw XMLException("No child tags expected for primitive type");
409  }
410  return currentElement()->getData();
411 }
413 {
414  t = loadStr();
415 }
417 {
418  std::string str;
419  load(str);
420  std::istringstream is(str);
421  is >> c;
422 }
424 {
425  string_view s = loadStr();
426  if (s == one_of("true", "1")) {
427  b = true;
428  } else if (s == one_of("false", "0")) {
429  b = false;
430  } else {
431  throw XMLException("Bad value found for boolean: ", s);
432  }
433 }
434 
435 // This function parses a number from a string. It's similar to the generic
436 // templatized XmlInputArchive::load() method, but _much_ faster. It does
437 // have some limitations though:
438 // - it can't handle leading whitespace
439 // - it can't handle extra characters at the end of the string
440 // - it can only handle one base (only decimal, not octal or hexadecimal)
441 // - it doesn't understand a leading '+' sign
442 // - it doesn't detect overflow or underflow (The generic implementation sets
443 // a 'bad' flag on the stream and clips the result to the min/max allowed
444 // value. Though this 'bad' flag was ignored by the openMSX code).
445 // This routine is only used to parse strings we've written ourselves (and the
446 // savestate/replay XML files are not meant to be manually edited). So the
447 // above limitations don't really matter. And we can use the speed gain.
448 template<bool IS_SIGNED> struct ConditionalNegate;
449 template<> struct ConditionalNegate<true> {
450  template<typename T> void operator()(bool negate, T& t) {
451  if (negate) t = -t; // ok to negate a signed type
452  }
453 };
454 template<> struct ConditionalNegate<false> {
455  template<typename T> void operator()(bool negate, T& /*t*/) {
456  assert(!negate); (void)negate; // can't negate unsigned type
457  }
458 };
459 template<typename T> static inline void fastAtoi(string_view str, T& t)
460 {
461  t = 0;
462  bool neg = false;
463  size_t i = 0;
464  size_t l = str.size();
465 
466  constexpr bool IS_SIGNED = std::numeric_limits<T>::is_signed;
467  if constexpr (IS_SIGNED) {
468  if (l == 0) return;
469  if (str[0] == '-') {
470  neg = true;
471  i = 1;
472  }
473  }
474  for (; i < l; ++i) {
475  unsigned d = str[i] - '0';
476  if (unlikely(d > 9)) {
477  throw XMLException("Invalid integer: ", str);
478  }
479  t = 10 * t + d;
480  }
481  // The following stuff does the equivalent of:
482  // if (neg) t = -t;
483  // Though this expression triggers a warning on VC++ when T is an
484  // unsigned type. This complex template stuff avoids the warning.
485  ConditionalNegate<IS_SIGNED> negateFunctor;
486  negateFunctor(neg, t);
487 }
489 {
490  string_view str = loadStr();
491  fastAtoi(str, i);
492 }
493 void XmlInputArchive::load(unsigned& u)
494 {
495  string_view str = loadStr();
496  fastAtoi(str, u);
497 }
498 void XmlInputArchive::load(unsigned long long& ull)
499 {
500  string_view str = loadStr();
501  fastAtoi(str, ull);
502 }
503 void XmlInputArchive::load(unsigned char& b)
504 {
505  unsigned i;
506  load(i);
507  b = i;
508 }
509 void XmlInputArchive::load(signed char& c)
510 {
511  int i;
512  load(i);
513  c = i;
514 }
516 {
517  int i;
518  load(i);
519  c = i;
520 }
521 
522 void XmlInputArchive::beginTag(const char* tag)
523 {
524  const auto* child = currentElement()->findChild(tag, elems.back().second);
525  if (!child) {
526  string path;
527  for (auto& e : elems) {
528  strAppend(path, e.first->getName(), '/');
529  }
530  throw XMLException("No child tag \"", tag,
531  "\" found at location \"", path, '\"');
532  }
533  elems.emplace_back(child, child->getFirstChild());
534 }
535 void XmlInputArchive::endTag(const char* tag)
536 {
537  const auto& elem = *currentElement();
538  if (elem.getName() != tag) {
539  throw XMLException("End tag \"", elem.getName(),
540  "\" not equal to begin tag \"", tag, "\"");
541  }
542  auto& elem2 = const_cast<XMLElement&>(elem);
543  elem2.clearName(); // mark this elem for later beginTag() calls
544  elems.pop_back();
545 }
546 
547 void XmlInputArchive::attribute(const char* name, string& t)
548 {
549  const auto* attr = currentElement()->findAttribute(name);
550  if (!attr) {
551  throw XMLException("Missing attribute \"", name, "\".");
552  }
553  t = attr->getValue();
554 }
555 void XmlInputArchive::attribute(const char* name, int& i)
556 {
557  attributeImpl(name, i);
558 }
559 void XmlInputArchive::attribute(const char* name, unsigned& u)
560 {
561  attributeImpl(name, u);
562 }
563 bool XmlInputArchive::hasAttribute(const char* name)
564 {
565  return currentElement()->findAttribute(name);
566 }
567 bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
568 {
569  if (const auto* attr = currentElement()->findAttribute(name)) {
570  if (auto r = StringOp::stringTo<int>(attr->getValue())) {
571  value = *r;
572  return true;
573  }
574  }
575  return false;
576 }
578 {
579  return int(currentElement()->numChildren());
580 }
581 
582 } // namespace openmsx
uintptr_t id
Definition: Interpreter.cc:26
TclObject t
bool contains(const K &k) const
Definition: hash_map.hh:110
iterator emplace_noDuplicateCheck(Args &&... args)
Definition: hash_set.hh:480
Definition: one_of.hh:7
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition: serialize.hh:234
void * getPointer(unsigned id)
Definition: serialize.cc:115
unsigned getId(const void *p) const
Definition: serialize.cc:127
void addPointer(unsigned id, const void *p)
Definition: serialize.cc:121
void serialize_blob(const char *tag, void *data, size_t len, bool diff=true)
Definition: serialize.cc:136
const uint8_t * getCurrentPos() const
Return a pointer to the current position in the buffer.
void skip(size_t len)
Skip the given number of bytes.
std::shared_ptr< DeltaBlock > createNullDiff(const void *id, const uint8_t *data, size_t size)
Definition: DeltaBlock.cc:396
std::shared_ptr< DeltaBlock > createNew(const void *id, const uint8_t *data, size_t size)
Definition: DeltaBlock.cc:360
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
std::string_view loadStr()
Definition: serialize.cc:201
void serialize_blob(const char *tag, void *data, size_t len, bool diff=true)
Definition: serialize.cc:237
void save(const T &t)
Definition: serialize.hh:659
MemBuffer< uint8_t > releaseBuffer(size_t &size)
Definition: serialize.cc:184
void serialize_blob(const char *tag, const void *data, size_t len, bool diff=true)
Definition: serialize.cc:218
void serialize_blob(const char *tag, const void *data, size_t len, bool diff=true)
Definition: serialize.cc:76
uint8_t * allocate(size_t len)
Reserve space to insert the given number of bytes.
MemBuffer< uint8_t > release(size_t &size)
Release ownership of the buffer.
unsigned size() const
Definition: TclObject.hh:167
static std::string full()
Definition: Version.cc:8
const XMLElement * getRoot() const
Definition: XMLElement.hh:272
void load(const std::string &filename, std::string_view systemID)
Definition: XMLElement.cc:320
const XMLAttribute * findAttribute(std::string_view attrName) const
Definition: XMLElement.cc:95
const XMLElement * findChild(std::string_view childName) const
Definition: XMLElement.cc:19
std::string_view getData() const
Definition: XMLElement.hh:176
void attribute(const char *name, T &t)
Definition: serialize.hh:981
bool findAttribute(const char *name, unsigned &value)
Definition: serialize.cc:567
const XMLElement * currentElement() const
Definition: serialize.hh:962
std::string_view loadStr()
Definition: serialize.cc:405
void endTag(const char *tag)
Definition: serialize.cc:535
int countChildren() const
Definition: serialize.cc:577
XmlInputArchive(const std::string &filename)
Definition: serialize.cc:398
void attributeImpl(const char *name, T &t)
Definition: serialize.hh:974
bool hasAttribute(const char *name)
Definition: serialize.cc:563
void beginTag(const char *tag)
Definition: serialize.cc:522
void loadChar(char &c)
Definition: serialize.cc:416
void saveImpl(const T &t)
Definition: serialize.hh:849
void check(bool condition) const
Definition: serialize.cc:323
void save(const T &t)
Definition: serialize.hh:855
XmlOutputArchive(zstring_view filename)
Definition: serialize.cc:259
void attributeImpl(const char *name, const T &t)
Definition: serialize.hh:893
void endTag(const char *tag)
Definition: serialize.cc:391
void beginTag(const char *tag)
Definition: serialize.cc:387
void attribute(const char *name, const T &t)
Definition: serialize.hh:897
void write(const char *buf, size_t len)
Definition: serialize.cc:309
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
Definition: zstring_view.hh:22
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition: hash_map.hh:118
#define unlikely(x)
Definition: likely.hh:15
bool decode_inplace(std::string_view input, uint8_t *output, size_t outSize)
Definition: Base64.cc:125
bool decode_inplace(std::string_view input, uint8_t *output, size_t outSize)
Definition: HexDump.cc:72
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:343
std::string toString(time_t time)
Definition: Date.cc:150
FILE_t openFile(zstring_view filename, zstring_view mode)
Call fopen() in a platform-independent manner.
This file implemented 3 utility functions:
Definition: Autofire.cc:9
constexpr size_t SMALL_SIZE
Definition: serialize.cc:217
constexpr const char *const filename
size_t size(std::string_view utf8)
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:669
void operator()(bool negate, T &)
Definition: serialize.cc:455
void operator()(bool negate, T &t)
Definition: serialize.cc:450