openMSX
serialize.cc
Go to the documentation of this file.
1 #include "serialize.hh"
2 #include "Base64.hh"
3 #include "HexDump.hh"
4 #include "XMLLoader.hh"
5 #include "XMLElement.hh"
6 #include "ConfigException.hh"
7 #include "XMLException.hh"
8 #include "DeltaBlock.hh"
9 #include "MemBuffer.hh"
10 #include "FileOperations.hh"
11 #include "Version.hh"
12 #include "Date.hh"
13 #include "stl.hh"
14 #include "cstdiop.hh" // for dup()
15 #include <cstring>
16 #include <limits>
17 
18 using std::string;
19 
20 namespace openmsx {
21 
22 template<typename Derived>
23 void ArchiveBase<Derived>::attribute(const char* name, const char* value)
24 {
25  string valueStr(value);
26  self().attribute(name, valueStr);
27 }
28 template class ArchiveBase<MemOutputArchive>;
29 template class ArchiveBase<XmlOutputArchive>;
30 
32 
33 unsigned OutputArchiveBase2::generateID1(const void* p)
34 {
35  #ifdef linux
36  assert("Can't serialize ID of object located on the stack" &&
37  !addressOnStack(p));
38  #endif
39  ++lastId;
40  assert(!polyIdMap.count(p)); // c++20 contains()
41  polyIdMap[p] = lastId;
42  return lastId;
43 }
44 unsigned OutputArchiveBase2::generateID2(
45  const void* p, const std::type_info& typeInfo)
46 {
47  #ifdef linux
48  assert("Can't serialize ID of object located on the stack" &&
49  !addressOnStack(p));
50  #endif
51  ++lastId;
52  auto key = std::make_pair(p, std::type_index(typeInfo));
53  assert(!idMap.count(key)); // c++20 contains()
54  idMap[key] = lastId;
55  return lastId;
56 }
57 
58 unsigned OutputArchiveBase2::getID1(const void* p)
59 {
60  auto v = lookup(polyIdMap, p);
61  return v ? *v : 0;
62 }
63 unsigned OutputArchiveBase2::getID2(
64  const void* p, const std::type_info& typeInfo)
65 {
66  auto v = lookup(idMap, std::make_pair(p, std::type_index(typeInfo)));
67  return v ? *v : 0;
68 }
69 
70 
71 template<typename Derived>
73  const char* tag, const void* data_, size_t len, bool /*diff*/)
74 {
75  auto* data = static_cast<const uint8_t*>(data_);
76 
77  string encoding;
78  string tmp;
79  if (false) {
80  // useful for debugging
81  encoding = "hex";
82  tmp = HexDump::encode(data, len);
83  } else if (false) {
84  encoding = "base64";
85  tmp = Base64::encode(data, len);
86  } else {
87  encoding = "gz-base64";
88  // TODO check for overflow?
89  auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
90  MemBuffer<byte> buf(dstLen);
91  if (compress2(buf.data(), &dstLen,
92  reinterpret_cast<const Bytef*>(data),
93  uLong(len), 9)
94  != Z_OK) {
95  throw MSXException("Error while compressing blob.");
96  }
97  tmp = Base64::encode(buf.data(), dstLen);
98  }
99  this->self().beginTag(tag);
100  this->self().attribute("encoding", encoding);
101  Saver<string> saver;
102  saver(this->self(), tmp, false);
103  this->self().endTag(tag);
104 }
105 
108 
110 
112 {
113  auto v = lookup(idMap, id);
114  return v ? *v : nullptr;
115 }
116 
117 void InputArchiveBase2::addPointer(unsigned id, const void* p)
118 {
119  assert(!idMap.count(id)); // c++20 contains()
120  idMap[id] = const_cast<void*>(p);
121 }
122 
123 unsigned InputArchiveBase2::getId(const void* ptr) const
124 {
125  for (const auto& p : idMap) {
126  if (p.second == ptr) return p.first;
127  }
128  return 0;
129 }
130 
131 template<typename Derived>
133  const char* tag, void* data, size_t len, bool /*diff*/)
134 {
135  this->self().beginTag(tag);
136  string encoding;
137  this->self().attribute("encoding", encoding);
138 
139  string_view tmp = this->self().loadStr();
140  this->self().endTag(tag);
141 
142  if (encoding == "gz-base64") {
143  auto p = Base64::decode(tmp);
144  auto dstLen = uLongf(len); // TODO check for overflow?
145  if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
146  reinterpret_cast<const Bytef*>(p.first.data()), uLong(p.second))
147  != Z_OK) ||
148  (dstLen != len)) {
149  throw MSXException("Error while decompressing blob.");
150  }
151  } else if ((encoding == "hex") || (encoding == "base64")) {
152  bool ok = (encoding == "hex")
153  ? HexDump::decode_inplace(tmp, static_cast<uint8_t*>(data), len)
154  : Base64 ::decode_inplace(tmp, static_cast<uint8_t*>(data), len);
155  if (!ok) {
156  throw XMLException(
157  "Length of decoded blob different from "
158  "expected value (", len, ')');
159  }
160  } else {
161  throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
162  }
163 }
164 
165 template class InputArchiveBase<MemInputArchive>;
166 template class InputArchiveBase<XmlInputArchive>;
167 
169 
170 void MemOutputArchive::save(const std::string& s)
171 {
172  auto size = s.size();
173  byte* buf = buffer.allocate(sizeof(size) + size);
174  memcpy(buf, &size, sizeof(size));
175  memcpy(buf + sizeof(size), s.data(), size);
176 }
177 
179 {
180  return buffer.release(size);
181 }
182 
184 
185 void MemInputArchive::load(std::string& s)
186 {
187  size_t length;
188  load(length);
189  s.resize(length);
190  if (length) {
191  get(&s[0], length);
192  }
193 }
194 
196 {
197  size_t length;
198  load(length);
199  const byte* p = buffer.getCurrentPos();
200  buffer.skip(length);
201  return string_view(reinterpret_cast<const char*>(p), length);
202 }
203 
205 
206 // Too small inputs don't compress very well (often the compressed size is even
207 // bigger than the input). It also takes a relatively long time (because often
208 // compression has a relatively large setup time). I choose this value
209 // semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
210 // registers won't be compressed.
211 static const size_t SMALL_SIZE = 64;
212 void MemOutputArchive::serialize_blob(const char* /*tag*/, const void* data,
213  size_t len, bool diff)
214 {
215  // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
216  if (len > SMALL_SIZE) {
217  auto deltaBlockIdx = unsigned(deltaBlocks.size());
218  save(deltaBlockIdx); // see comment below in MemInputArchive
219  deltaBlocks.push_back(diff
220  ? lastDeltaBlocks.createNew(
221  data, static_cast<const uint8_t*>(data), len)
222  : lastDeltaBlocks.createNullDiff(
223  data, static_cast<const uint8_t*>(data), len));
224  } else {
225  byte* buf = buffer.allocate(len);
226  memcpy(buf, data, len);
227  }
228 
229 }
230 
231 void MemInputArchive::serialize_blob(const char* /*tag*/, void* data,
232  size_t len, bool /*diff*/)
233 {
234  if (len > SMALL_SIZE) {
235  // Usually blobs are saved in the same order as they are loaded
236  // (via the serialize_blob() methods in respectively
237  // MemOutputArchive and MemInputArchive). In that case keeping
238  // track of the deltaBlockIdx in the savestate itself is
239  // redundant (it will simply be an increasing value). However
240  // in rare cases, via the {begin,end,skip)Section() methods, it
241  // is possible that certain blobs are stored in the savestate,
242  // but skipped while loading. That's why we do need the index.
243  unsigned deltaBlockIdx; load(deltaBlockIdx);
244  deltaBlocks[deltaBlockIdx]->apply(static_cast<uint8_t*>(data), len);
245  } else {
246  memcpy(data, buffer.getCurrentPos(), len);
247  buffer.skip(len);
248  }
249 }
250 
252 
253 XmlOutputArchive::XmlOutputArchive(const string& filename)
254  : root("serial")
255 {
256  root.addAttribute("openmsx_version", Version::full());
257  root.addAttribute("date_time", Date::toString(time(nullptr)));
258  root.addAttribute("platform", TARGET_PLATFORM);
259  {
260  auto f = FileOperations::openFile(filename, "wb");
261  if (!f) goto error;
262  int duped_fd = dup(fileno(f.get()));
263  if (duped_fd == -1) goto error;
264  file = gzdopen(duped_fd, "wb9");
265  if (!file) {
266  close(duped_fd);
267  goto error;
268  }
269  current.push_back(&root);
270  return; // success
271  // on scope-exit 'File* f' is closed, and 'gzFile file'
272  // uses the dup()'ed file descriptor.
273  }
274 
275 error:
276  throw XMLException("Could not open compressed file \"", filename, "\"");
277 }
278 
280 {
281  assert(current.back() == &root);
282  const char* header =
283  "<?xml version=\"1.0\" ?>\n"
284  "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
285  gzwrite(file, const_cast<char*>(header), unsigned(strlen(header)));
286  string dump = root.dump();
287  gzwrite(file, const_cast<char*>(dump.data()), unsigned(dump.size()));
288  gzclose(file);
289 }
290 
292 {
293  save(string(1, c));
294 }
295 void XmlOutputArchive::save(const string& str)
296 {
297  assert(!current.empty());
298  assert(current.back()->getData().empty());
299  current.back()->setData(str);
300 }
302 {
303  assert(!current.empty());
304  assert(current.back()->getData().empty());
305  current.back()->setData(b ? "true" : "false");
306 }
307 void XmlOutputArchive::save(unsigned char b)
308 {
309  save(unsigned(b));
310 }
311 void XmlOutputArchive::save(signed char c)
312 {
313  save(int(c));
314 }
316 {
317  save(int(c));
318 }
320 {
321  saveImpl(i);
322 }
323 void XmlOutputArchive::save(unsigned u)
324 {
325  saveImpl(u);
326 }
327 void XmlOutputArchive::save(unsigned long long ull)
328 {
329  saveImpl(ull);
330 }
331 
332 void XmlOutputArchive::attribute(const char* name, const string& str)
333 {
334  assert(!current.empty());
335  assert(!current.back()->hasAttribute(name));
336  current.back()->addAttribute(name, str);
337 }
338 void XmlOutputArchive::attribute(const char* name, int i)
339 {
340  attributeImpl(name, i);
341 }
342 void XmlOutputArchive::attribute(const char* name, unsigned u)
343 {
344  attributeImpl(name, u);
345 }
346 
347 void XmlOutputArchive::beginTag(const char* tag)
348 {
349  assert(!current.empty());
350  auto& elem = current.back()->addChild(tag);
351  current.push_back(&elem);
352 }
353 void XmlOutputArchive::endTag(const char* tag)
354 {
355  assert(!current.empty());
356  assert(current.back()->getName() == tag); (void)tag;
357  current.pop_back();
358 }
359 
361 
362 XmlInputArchive::XmlInputArchive(const string& filename)
363  : rootElem(XMLLoader::load(filename, "openmsx-serialize.dtd"))
364 {
365  elems.emplace_back(&rootElem, 0);
366 }
367 
369 {
370  if (!elems.back().first->getChildren().empty()) {
371  throw XMLException("No child tags expected for primitive type");
372  }
373  return elems.back().first->getData();
374 }
376 {
377  t = loadStr().str();
378 }
380 {
381  std::string str;
382  load(str);
383  std::istringstream is(str);
384  is >> c;
385 }
387 {
388  string_view s = loadStr();
389  if ((s == "true") || (s == "1")) {
390  b = true;
391  } else if ((s == "false") || (s == "0")) {
392  b = false;
393  } else {
394  throw XMLException("Bad value found for boolean: ", s);
395  }
396 }
397 
398 // This function parses a number from a string. It's similar to the generic
399 // templatized XmlInputArchive::load() method, but _much_ faster. It does
400 // have some limitations though:
401 // - it can't handle leading whitespace
402 // - it can't handle extra characters at the end of the string
403 // - it can only handle one base (only decimal, not octal or hexadecimal)
404 // - it doesn't understand a leading '+' sign
405 // - it doesn't detect overflow or underflow (The generic implementation sets
406 // a 'bad' flag on the stream and clips the result to the min/max allowed
407 // value. Though this 'bad' flag was ignored by the openMSX code).
408 // This routine is only used to parse strings we've written ourselves (and the
409 // savestate/replay XML files are not meant to be manually edited). So the
410 // above limitations don't really matter. And we can use the speed gain.
411 template<bool IS_SIGNED> struct ConditionalNegate;
412 template<> struct ConditionalNegate<true> {
413  template<typename T> void operator()(bool negate, T& t) {
414  if (negate) t = -t; // ok to negate a signed type
415  }
416 };
417 template<> struct ConditionalNegate<false> {
418  template<typename T> void operator()(bool negate, T& /*t*/) {
419  assert(!negate); (void)negate; // can't negate unsigned type
420  }
421 };
422 template<typename T> static inline void fastAtoi(string_view str, T& t)
423 {
424  t = 0;
425  bool neg = false;
426  size_t i = 0;
427  size_t l = str.size();
428 
429  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
430  if (IS_SIGNED) {
431  if (l == 0) return;
432  if (str[0] == '-') {
433  neg = true;
434  i = 1;
435  }
436  }
437  for (; i < l; ++i) {
438  unsigned d = str[i] - '0';
439  if (unlikely(d > 9)) {
440  throw XMLException("Invalid integer: ", str);
441  }
442  t = 10 * t + d;
443  }
444  // The following stuff does the equivalent of:
445  // if (neg) t = -t;
446  // Though this expression triggers a warning on VC++ when T is an
447  // unsigned type. This complex template stuff avoids the warning.
448  ConditionalNegate<IS_SIGNED> negateFunctor;
449  negateFunctor(neg, t);
450 }
452 {
453  string_view str = loadStr();
454  fastAtoi(str, i);
455 }
456 void XmlInputArchive::load(unsigned& u)
457 {
458  string_view str = loadStr();
459  fastAtoi(str, u);
460 }
461 void XmlInputArchive::load(unsigned long long& ull)
462 {
463  string_view str = loadStr();
464  fastAtoi(str, ull);
465 }
466 void XmlInputArchive::load(unsigned char& b)
467 {
468  unsigned i;
469  load(i);
470  b = i;
471 }
472 void XmlInputArchive::load(signed char& c)
473 {
474  int i;
475  load(i);
476  c = i;
477 }
479 {
480  int i;
481  load(i);
482  c = i;
483 }
484 
485 void XmlInputArchive::beginTag(const char* tag)
486 {
487  auto* child = elems.back().first->findNextChild(
488  tag, elems.back().second);
489  if (!child) {
490  string path;
491  for (auto& e : elems) {
492  strAppend(path, e.first->getName(), '/');
493  }
494  throw XMLException("No child tag \"", tag,
495  "\" found at location \"", path, '\"');
496  }
497  elems.emplace_back(child, 0);
498 }
499 void XmlInputArchive::endTag(const char* tag)
500 {
501  const auto& elem = *elems.back().first;
502  if (elem.getName() != tag) {
503  throw XMLException("End tag \"", elem.getName(),
504  "\" not equal to begin tag \"", tag, "\"");
505  }
506  auto& elem2 = const_cast<XMLElement&>(elem);
507  elem2.clearName(); // mark this elem for later beginTag() calls
508  elems.pop_back();
509 }
510 
511 void XmlInputArchive::attribute(const char* name, string& t)
512 {
513  try {
514  t = elems.back().first->getAttribute(name);
515  } catch (ConfigException& e) {
516  throw XMLException(std::move(e).getMessage());
517  }
518 }
519 void XmlInputArchive::attribute(const char* name, int& i)
520 {
521  attributeImpl(name, i);
522 }
523 void XmlInputArchive::attribute(const char* name, unsigned& u)
524 {
525  attributeImpl(name, u);
526 }
527 bool XmlInputArchive::hasAttribute(const char* name)
528 {
529  return elems.back().first->hasAttribute(name);
530 }
531 bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
532 {
533  return elems.back().first->findAttributeInt(name, value);
534 }
536 {
537  return int(elems.back().first->getChildren().size());
538 }
539 
540 } // namespace openmsx
void attribute(const char *name, T &t)
Definition: serialize.hh:823
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:343
bool findAttribute(const char *name, unsigned &value)
Definition: serialize.cc:531
void operator()(bool negate, T &t)
Definition: serialize.cc:413
#define unlikely(x)
Definition: likely.hh:15
void loadChar(char &c)
Definition: serialize.cc:379
XmlInputArchive(const std::string &filename)
Definition: serialize.cc:362
void * getPointer(unsigned id)
Definition: serialize.cc:111
void serialize_blob(const char *tag, void *data, size_t len, bool diff=true)
Definition: serialize.cc:132
void save(const T &t)
Definition: serialize.hh:616
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition: hash_map.hh:91
uint8_t byte
8 bit unsigned integer
Definition: openmsx.hh:26
std::string dump() const
Definition: XMLElement.cc:275
unsigned getId(const void *p) const
Definition: serialize.cc:123
void endTag(const char *tag)
Definition: serialize.cc:499
void addPointer(unsigned id, const void *p)
Definition: serialize.cc:117
MemBuffer< byte > releaseBuffer(size_t &size)
Definition: serialize.cc:178
string_view loadStr()
Definition: serialize.cc:368
bool hasAttribute(const char *name)
Definition: serialize.cc:527
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:648
constexpr auto data(C &c) -> decltype(c.data())
Definition: span.hh:69
int countChildren() const
Definition: serialize.cc:535
bool decode_inplace(string_view input, uint8_t *output, size_t outSize)
Definition: Base64.cc:125
void addAttribute(string_view name, string_view value)
Definition: XMLElement.cc:58
void attribute(const char *name, const T &t)
Definition: serialize.hh:760
void endTag(const char *tag)
Definition: serialize.cc:353
void attributeImpl(const char *name, const T &t)
Definition: serialize.hh:756
constexpr size_t strlen(const char *s) noexcept
Definition: cstd.hh:135
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition: serialize.hh:212
void save(const T &t)
Definition: serialize.hh:731
void uncompress(const char *input, size_t inLen, char *output, size_t outLen)
Definition: snappy.cc:166
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:90
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
XMLElement load(string_view filename, string_view systemID)
Definition: XMLLoader.cc:31
bool decode_inplace(string_view input, uint8_t *output, size_t outSize)
Definition: HexDump.cc:72
string_view loadStr()
Definition: serialize.cc:195
void serialize_blob(const char *tag, void *data, size_t len, bool diff=true)
Definition: serialize.cc:231
This class implements a (close approximation) of the std::string_view class.
Definition: string_view.hh:16
void serialize_blob(const char *tag, const void *data, size_t len, bool diff=true)
Definition: serialize.cc:212
std::string str() const
Definition: string_view.cc:12
void operator()(bool negate, T &)
Definition: serialize.cc:418
void serialize_blob(const char *tag, const void *data, size_t len, bool diff=true)
Definition: serialize.cc:72
void beginTag(const char *tag)
Definition: serialize.cc:485
static std::string full()
Definition: Version.cc:8
FILE_t openFile(const std::string &filename, const std::string &mode)
Call fopen() in a platform-independent manner.
void saveImpl(const T &t)
Definition: serialize.hh:725
XmlOutputArchive(const std::string &filename)
Definition: serialize.cc:253
size_type size() const
Definition: string_view.hh:44
void beginTag(const char *tag)
Definition: serialize.cc:347
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.hh:62
TclObject t
void attributeImpl(const char *name, T &t)
Definition: serialize.hh:816