openMSX
serialize.cc
Go to the documentation of this file.
1 #include "serialize.hh"
2 #include "Base64.hh"
3 #include "HexDump.hh"
4 #include "XMLLoader.hh"
5 #include "XMLElement.hh"
6 #include "ConfigException.hh"
7 #include "XMLException.hh"
8 #include "DeltaBlock.hh"
9 #include "MemBuffer.hh"
10 #include "FileOperations.hh"
11 #include "Version.hh"
12 #include "Date.hh"
13 #include "stl.hh"
14 #include "cstdiop.hh" // for dup()
15 #include <cstring>
16 #include <iostream>
17 #include <limits>
18 
19 using std::string;
20 
21 namespace openmsx {
22 
23 template<typename Derived>
24 void ArchiveBase<Derived>::attribute(const char* name, const char* value)
25 {
26  string valueStr(value);
27  self().attribute(name, valueStr);
28 }
29 template class ArchiveBase<MemOutputArchive>;
30 template class ArchiveBase<XmlOutputArchive>;
31 
33 
34 unsigned OutputArchiveBase2::generateID1(const void* p)
35 {
36  #ifdef linux
37  assert("Can't serialize ID of object located on the stack" &&
38  !addressOnStack(p));
39  #endif
40  ++lastId;
41  assert(!polyIdMap.contains(p));
42  polyIdMap.emplace_noDuplicateCheck(p, lastId);
43  return lastId;
44 }
45 unsigned OutputArchiveBase2::generateID2(
46  const void* p, const std::type_info& typeInfo)
47 {
48  #ifdef linux
49  assert("Can't serialize ID of object located on the stack" &&
50  !addressOnStack(p));
51  #endif
52  ++lastId;
53  auto key = std::make_pair(p, std::type_index(typeInfo));
54  assert(!idMap.contains(key));
55  idMap.emplace_noDuplicateCheck(key, lastId);
56  return lastId;
57 }
58 
59 unsigned OutputArchiveBase2::getID1(const void* p)
60 {
61  auto v = lookup(polyIdMap, p);
62  return v ? *v : 0;
63 }
64 unsigned OutputArchiveBase2::getID2(
65  const void* p, const std::type_info& typeInfo)
66 {
67  auto v = lookup(idMap, std::make_pair(p, std::type_index(typeInfo)));
68  return v ? *v : 0;
69 }
70 
71 
72 template<typename Derived>
74  const char* tag, const void* data_, size_t len, bool /*diff*/)
75 {
76  auto* data = static_cast<const uint8_t*>(data_);
77 
78  string encoding;
79  string tmp;
80  if (false) {
81  // useful for debugging
82  encoding = "hex";
83  tmp = HexDump::encode(data, len);
84  } else if (false) {
85  encoding = "base64";
86  tmp = Base64::encode(data, len);
87  } else {
88  encoding = "gz-base64";
89  // TODO check for overflow?
90  auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
91  MemBuffer<uint8_t> buf(dstLen);
92  if (compress2(buf.data(), &dstLen,
93  reinterpret_cast<const Bytef*>(data),
94  uLong(len), 9)
95  != Z_OK) {
96  throw MSXException("Error while compressing blob.");
97  }
98  tmp = Base64::encode(buf.data(), dstLen);
99  }
100  this->self().beginTag(tag);
101  this->self().attribute("encoding", encoding);
102  Saver<string> saver;
103  saver(this->self(), tmp, false);
104  this->self().endTag(tag);
105 }
106 
109 
111 
113 {
114  auto v = lookup(idMap, id);
115  return v ? *v : nullptr;
116 }
117 
118 void InputArchiveBase2::addPointer(unsigned id, const void* p)
119 {
120  assert(!idMap.contains(id));
121  idMap.emplace_noDuplicateCheck(id, const_cast<void*>(p));
122 }
123 
124 unsigned InputArchiveBase2::getId(const void* ptr) const
125 {
126  for (const auto& p : idMap) {
127  if (p.second == ptr) return p.first;
128  }
129  return 0;
130 }
131 
132 template<typename Derived>
134  const char* tag, void* data, size_t len, bool /*diff*/)
135 {
136  this->self().beginTag(tag);
137  string encoding;
138  this->self().attribute("encoding", encoding);
139 
140  string_view tmp = this->self().loadStr();
141  this->self().endTag(tag);
142 
143  if (encoding == "gz-base64") {
144  auto p = Base64::decode(tmp);
145  auto dstLen = uLongf(len); // TODO check for overflow?
146  if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
147  reinterpret_cast<const Bytef*>(p.first.data()), uLong(p.second))
148  != Z_OK) ||
149  (dstLen != len)) {
150  throw MSXException("Error while decompressing blob.");
151  }
152  } else if ((encoding == "hex") || (encoding == "base64")) {
153  bool ok = (encoding == "hex")
154  ? HexDump::decode_inplace(tmp, static_cast<uint8_t*>(data), len)
155  : Base64 ::decode_inplace(tmp, static_cast<uint8_t*>(data), len);
156  if (!ok) {
157  throw XMLException(
158  "Length of decoded blob different from "
159  "expected value (", len, ')');
160  }
161  } else {
162  throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
163  }
164 }
165 
166 template class InputArchiveBase<MemInputArchive>;
167 template class InputArchiveBase<XmlInputArchive>;
168 
170 
171 void MemOutputArchive::save(const std::string& s)
172 {
173  auto size = s.size();
174  uint8_t* buf = buffer.allocate(sizeof(size) + size);
175  memcpy(buf, &size, sizeof(size));
176  memcpy(buf + sizeof(size), s.data(), size);
177 }
178 
180 {
181  return buffer.release(size);
182 }
183 
185 
186 void MemInputArchive::load(std::string& s)
187 {
188  size_t length;
189  load(length);
190  s.resize(length);
191  if (length) {
192  get(&s[0], length);
193  }
194 }
195 
197 {
198  size_t length;
199  load(length);
200  const uint8_t* p = buffer.getCurrentPos();
201  buffer.skip(length);
202  return string_view(reinterpret_cast<const char*>(p), length);
203 }
204 
206 
207 // Too small inputs don't compress very well (often the compressed size is even
208 // bigger than the input). It also takes a relatively long time (because often
209 // compression has a relatively large setup time). I choose this value
210 // semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
211 // registers won't be compressed.
212 static const size_t SMALL_SIZE = 64;
213 void MemOutputArchive::serialize_blob(const char* /*tag*/, const void* data,
214  size_t len, bool diff)
215 {
216  // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
217  if (len > SMALL_SIZE) {
218  auto deltaBlockIdx = unsigned(deltaBlocks.size());
219  save(deltaBlockIdx); // see comment below in MemInputArchive
220  deltaBlocks.push_back(diff
221  ? lastDeltaBlocks.createNew(
222  data, static_cast<const uint8_t*>(data), len)
223  : lastDeltaBlocks.createNullDiff(
224  data, static_cast<const uint8_t*>(data), len));
225  } else {
226  uint8_t* buf = buffer.allocate(len);
227  memcpy(buf, data, len);
228  }
229 
230 }
231 
232 void MemInputArchive::serialize_blob(const char* /*tag*/, void* data,
233  size_t len, bool /*diff*/)
234 {
235  if (len > SMALL_SIZE) {
236  // Usually blobs are saved in the same order as they are loaded
237  // (via the serialize_blob() methods in respectively
238  // MemOutputArchive and MemInputArchive). In that case keeping
239  // track of the deltaBlockIdx in the savestate itself is
240  // redundant (it will simply be an increasing value). However
241  // in rare cases, via the {begin,end,skip)Section() methods, it
242  // is possible that certain blobs are stored in the savestate,
243  // but skipped while loading. That's why we do need the index.
244  unsigned deltaBlockIdx; load(deltaBlockIdx);
245  deltaBlocks[deltaBlockIdx]->apply(static_cast<uint8_t*>(data), len);
246  } else {
247  memcpy(data, buffer.getCurrentPos(), len);
248  buffer.skip(len);
249  }
250 }
251 
253 
254 XmlOutputArchive::XmlOutputArchive(const string& filename)
255  : root("serial")
256 {
257  root.addAttribute("openmsx_version", Version::full());
258  root.addAttribute("date_time", Date::toString(time(nullptr)));
259  root.addAttribute("platform", TARGET_PLATFORM);
260  {
261  auto f = FileOperations::openFile(filename, "wb");
262  if (!f) goto error;
263  int duped_fd = dup(fileno(f.get()));
264  if (duped_fd == -1) goto error;
265  file = gzdopen(duped_fd, "wb9");
266  if (!file) {
267  ::close(duped_fd);
268  goto error;
269  }
270  current.push_back(&root);
271  return; // success
272  // on scope-exit 'File* f' is closed, and 'gzFile file'
273  // uses the dup()'ed file descriptor.
274  }
275 
276 error:
277  throw XMLException("Could not open compressed file \"", filename, "\"");
278 }
279 
281 {
282  if (!file) return; // already closed
283 
284  assert(current.back() == &root);
285  const char* header =
286  "<?xml version=\"1.0\" ?>\n"
287  "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
288  string dump = root.dump();
289  if ((gzwrite(file, const_cast<char*>(header), unsigned(strlen(header))) == 0) ||
290  (gzwrite(file, const_cast<char*>(dump.data()), unsigned(dump.size())) == 0) ||
291  (gzclose(file) != Z_OK)) {
292  throw XMLException("Could not write savestate file.");
293  }
294 
295  file = nullptr;
296 }
297 
299 {
300  try {
301  close();
302  } catch (...) {
303  // Eat exception. Explicitly call close() if you want to handle errors.
304  }
305 }
306 
308 {
309  save(string(1, c));
310 }
311 void XmlOutputArchive::save(const string& str)
312 {
313  assert(!current.empty());
314  assert(current.back()->getData().empty());
315  current.back()->setData(str);
316 }
318 {
319  assert(!current.empty());
320  assert(current.back()->getData().empty());
321  current.back()->setData(b ? "true" : "false");
322 }
323 void XmlOutputArchive::save(unsigned char b)
324 {
325  save(unsigned(b));
326 }
327 void XmlOutputArchive::save(signed char c)
328 {
329  save(int(c));
330 }
332 {
333  save(int(c));
334 }
336 {
337  saveImpl(i);
338 }
339 void XmlOutputArchive::save(unsigned u)
340 {
341  saveImpl(u);
342 }
343 void XmlOutputArchive::save(unsigned long long ull)
344 {
345  saveImpl(ull);
346 }
347 
348 void XmlOutputArchive::attribute(const char* name, const string& str)
349 {
350  assert(!current.empty());
351  assert(!current.back()->hasAttribute(name));
352  current.back()->addAttribute(name, str);
353 }
354 void XmlOutputArchive::attribute(const char* name, int i)
355 {
356  attributeImpl(name, i);
357 }
358 void XmlOutputArchive::attribute(const char* name, unsigned u)
359 {
360  attributeImpl(name, u);
361 }
362 
363 void XmlOutputArchive::beginTag(const char* tag)
364 {
365  assert(!current.empty());
366  auto& elem = current.back()->addChild(tag);
367  current.push_back(&elem);
368 }
369 void XmlOutputArchive::endTag(const char* tag)
370 {
371  assert(!current.empty());
372  assert(current.back()->getName() == tag); (void)tag;
373  current.pop_back();
374 }
375 
377 
378 XmlInputArchive::XmlInputArchive(const string& filename)
379  : rootElem(XMLLoader::load(filename, "openmsx-serialize.dtd"))
380 {
381  elems.emplace_back(&rootElem, 0);
382 }
383 
385 {
386  if (!elems.back().first->getChildren().empty()) {
387  throw XMLException("No child tags expected for primitive type");
388  }
389  return elems.back().first->getData();
390 }
392 {
393  t = loadStr().str();
394 }
396 {
397  std::string str;
398  load(str);
399  std::istringstream is(str);
400  is >> c;
401 }
403 {
404  string_view s = loadStr();
405  if ((s == "true") || (s == "1")) {
406  b = true;
407  } else if ((s == "false") || (s == "0")) {
408  b = false;
409  } else {
410  throw XMLException("Bad value found for boolean: ", s);
411  }
412 }
413 
414 // This function parses a number from a string. It's similar to the generic
415 // templatized XmlInputArchive::load() method, but _much_ faster. It does
416 // have some limitations though:
417 // - it can't handle leading whitespace
418 // - it can't handle extra characters at the end of the string
419 // - it can only handle one base (only decimal, not octal or hexadecimal)
420 // - it doesn't understand a leading '+' sign
421 // - it doesn't detect overflow or underflow (The generic implementation sets
422 // a 'bad' flag on the stream and clips the result to the min/max allowed
423 // value. Though this 'bad' flag was ignored by the openMSX code).
424 // This routine is only used to parse strings we've written ourselves (and the
425 // savestate/replay XML files are not meant to be manually edited). So the
426 // above limitations don't really matter. And we can use the speed gain.
427 template<bool IS_SIGNED> struct ConditionalNegate;
428 template<> struct ConditionalNegate<true> {
429  template<typename T> void operator()(bool negate, T& t) {
430  if (negate) t = -t; // ok to negate a signed type
431  }
432 };
433 template<> struct ConditionalNegate<false> {
434  template<typename T> void operator()(bool negate, T& /*t*/) {
435  assert(!negate); (void)negate; // can't negate unsigned type
436  }
437 };
438 template<typename T> static inline void fastAtoi(string_view str, T& t)
439 {
440  t = 0;
441  bool neg = false;
442  size_t i = 0;
443  size_t l = str.size();
444 
445  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
446  if (IS_SIGNED) {
447  if (l == 0) return;
448  if (str[0] == '-') {
449  neg = true;
450  i = 1;
451  }
452  }
453  for (; i < l; ++i) {
454  unsigned d = str[i] - '0';
455  if (unlikely(d > 9)) {
456  throw XMLException("Invalid integer: ", str);
457  }
458  t = 10 * t + d;
459  }
460  // The following stuff does the equivalent of:
461  // if (neg) t = -t;
462  // Though this expression triggers a warning on VC++ when T is an
463  // unsigned type. This complex template stuff avoids the warning.
464  ConditionalNegate<IS_SIGNED> negateFunctor;
465  negateFunctor(neg, t);
466 }
468 {
469  string_view str = loadStr();
470  fastAtoi(str, i);
471 }
472 void XmlInputArchive::load(unsigned& u)
473 {
474  string_view str = loadStr();
475  fastAtoi(str, u);
476 }
477 void XmlInputArchive::load(unsigned long long& ull)
478 {
479  string_view str = loadStr();
480  fastAtoi(str, ull);
481 }
482 void XmlInputArchive::load(unsigned char& b)
483 {
484  unsigned i;
485  load(i);
486  b = i;
487 }
488 void XmlInputArchive::load(signed char& c)
489 {
490  int i;
491  load(i);
492  c = i;
493 }
495 {
496  int i;
497  load(i);
498  c = i;
499 }
500 
501 void XmlInputArchive::beginTag(const char* tag)
502 {
503  auto* child = elems.back().first->findNextChild(
504  tag, elems.back().second);
505  if (!child) {
506  string path;
507  for (auto& e : elems) {
508  strAppend(path, e.first->getName(), '/');
509  }
510  throw XMLException("No child tag \"", tag,
511  "\" found at location \"", path, '\"');
512  }
513  elems.emplace_back(child, 0);
514 }
515 void XmlInputArchive::endTag(const char* tag)
516 {
517  const auto& elem = *elems.back().first;
518  if (elem.getName() != tag) {
519  throw XMLException("End tag \"", elem.getName(),
520  "\" not equal to begin tag \"", tag, "\"");
521  }
522  auto& elem2 = const_cast<XMLElement&>(elem);
523  elem2.clearName(); // mark this elem for later beginTag() calls
524  elems.pop_back();
525 }
526 
527 void XmlInputArchive::attribute(const char* name, string& t)
528 {
529  try {
530  t = elems.back().first->getAttribute(name);
531  } catch (ConfigException& e) {
532  throw XMLException(std::move(e).getMessage());
533  }
534 }
535 void XmlInputArchive::attribute(const char* name, int& i)
536 {
537  attributeImpl(name, i);
538 }
539 void XmlInputArchive::attribute(const char* name, unsigned& u)
540 {
541  attributeImpl(name, u);
542 }
543 bool XmlInputArchive::hasAttribute(const char* name)
544 {
545  return elems.back().first->hasAttribute(name);
546 }
547 bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
548 {
549  return elems.back().first->findAttributeInt(name, value);
550 }
552 {
553  return int(elems.back().first->getChildren().size());
554 }
555 
556 } // namespace openmsx
bool contains(const K &k) const
Definition: hash_map.hh:83
void attribute(const char *name, T &t)
Definition: serialize.hh:989
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:343
bool findAttribute(const char *name, unsigned &value)
Definition: serialize.cc:547
void operator()(bool negate, T &t)
Definition: serialize.cc:429
#define unlikely(x)
Definition: likely.hh:15
void loadChar(char &c)
Definition: serialize.cc:395
MemBuffer< uint8_t > releaseBuffer(size_t &size)
Definition: serialize.cc:179
XmlInputArchive(const std::string &filename)
Definition: serialize.cc:378
void * getPointer(unsigned id)
Definition: serialize.cc:112
void serialize_blob(const char *tag, void *data, size_t len, bool diff=true)
Definition: serialize.cc:133
void save(const T &t)
Definition: serialize.hh:655
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition: hash_map.hh:91
std::string dump() const
Definition: XMLElement.cc:252
unsigned getId(const void *p) const
Definition: serialize.cc:124
void endTag(const char *tag)
Definition: serialize.cc:515
void addPointer(unsigned id, const void *p)
Definition: serialize.cc:118
string_view loadStr()
Definition: serialize.cc:384
bool hasAttribute(const char *name)
Definition: serialize.cc:543
void strAppend(std::string &result, Ts &&...ts)
Definition: strCat.hh:648
constexpr auto data(C &c) -> decltype(c.data())
Definition: span.hh:69
int countChildren() const
Definition: serialize.cc:551
bool decode_inplace(string_view input, uint8_t *output, size_t outSize)
Definition: Base64.cc:125
void attribute(const char *name, const T &t)
Definition: serialize.hh:915
void endTag(const char *tag)
Definition: serialize.cc:369
void attributeImpl(const char *name, const T &t)
Definition: serialize.hh:911
constexpr size_t strlen(const char *s) noexcept
Definition: cstd.hh:135
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition: serialize.hh:231
void save(const T &t)
Definition: serialize.hh:875
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:90
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
XMLElement load(string_view filename, string_view systemID)
Definition: XMLLoader.cc:31
bool decode_inplace(string_view input, uint8_t *output, size_t outSize)
Definition: HexDump.cc:72
string_view loadStr()
Definition: serialize.cc:196
void serialize_blob(const char *tag, void *data, size_t len, bool diff=true)
Definition: serialize.cc:232
This class implements a (close approximation) of the std::string_view class.
Definition: string_view.hh:16
void serialize_blob(const char *tag, const void *data, size_t len, bool diff=true)
Definition: serialize.cc:213
std::string str() const
Definition: string_view.cc:12
void operator()(bool negate, T &)
Definition: serialize.cc:434
void serialize_blob(const char *tag, const void *data, size_t len, bool diff=true)
Definition: serialize.cc:73
void beginTag(const char *tag)
Definition: serialize.cc:501
iterator emplace_noDuplicateCheck(Args &&... args)
Definition: hash_set.hh:487
static std::string full()
Definition: Version.cc:8
FILE_t openFile(const std::string &filename, const std::string &mode)
Call fopen() in a platform-independent manner.
void saveImpl(const T &t)
Definition: serialize.hh:869
XmlOutputArchive(const std::string &filename)
Definition: serialize.cc:254
size_type size() const
Definition: string_view.hh:44
void addAttribute(std::string name, std::string value)
Definition: XMLElement.cc:53
void beginTag(const char *tag)
Definition: serialize.cc:363
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.hh:62
TclObject t
void attributeImpl(const char *name, T &t)
Definition: serialize.hh:982