openMSX
serialize.cc
Go to the documentation of this file.
1 #include "serialize.hh"
2 #include "Base64.hh"
3 #include "HexDump.hh"
4 #include "XMLLoader.hh"
5 #include "XMLElement.hh"
6 #include "ConfigException.hh"
7 #include "XMLException.hh"
8 #include "snappy.hh"
9 #include "MemBuffer.hh"
10 #include "StringOp.hh"
11 #include "FileOperations.hh"
12 #include "Version.hh"
13 #include "Date.hh"
14 #include "cstdiop.hh" // for dup()
15 #include <cstring>
16 #include <limits>
17 
18 using std::string;
19 
20 namespace openmsx {
21 
22 template<typename Derived>
23 void ArchiveBase<Derived>::attribute(const char* name, const char* value)
24 {
25  string valueStr(value);
26  self().attribute(name, valueStr);
27 }
28 template class ArchiveBase<MemOutputArchive>;
29 template class ArchiveBase<XmlOutputArchive>;
30 
32 
34  : lastId(0)
35 {
36 }
37 
38 unsigned OutputArchiveBase2::generateID1(const void* p)
39 {
40  #ifdef linux
41  assert("Can't serialize ID of object located on the stack" &&
42  !addressOnStack(p));
43  #endif
44  ++lastId;
45  assert(polyIdMap.find(p) == end(polyIdMap));
46  polyIdMap[p] = lastId;
47  return lastId;
48 }
49 unsigned OutputArchiveBase2::generateID2(
50  const void* p, const std::type_info& typeInfo)
51 {
52  #ifdef linux
53  assert("Can't serialize ID of object located on the stack" &&
54  !addressOnStack(p));
55  #endif
56  ++lastId;
57  auto key = std::make_pair(p, std::type_index(typeInfo));
58  assert(idMap.find(key) == end(idMap));
59  idMap[key] = lastId;
60  return lastId;
61 }
62 
63 unsigned OutputArchiveBase2::getID1(const void* p)
64 {
65  auto it = polyIdMap.find(p);
66  return it != end(polyIdMap) ? it->second : 0;
67 }
68 unsigned OutputArchiveBase2::getID2(
69  const void* p, const std::type_info& typeInfo)
70 {
71  auto it = idMap.find({p, std::type_index(typeInfo)});
72  return it != end(idMap) ? it->second : 0;
73 }
74 
75 
76 template<typename Derived>
78  const char* tag, const void* data_, size_t len)
79 {
80  auto* data = static_cast<const uint8_t*>(data_);
81 
82  string encoding;
83  string tmp;
84  if (false) {
85  // useful for debugging
86  encoding = "hex";
87  tmp = HexDump::encode(data, len);
88  } else if (false) {
89  encoding = "base64";
90  tmp = Base64::encode(data, len);
91  } else {
92  encoding = "gz-base64";
93  // TODO check for overflow?
94  auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
95  MemBuffer<byte> buf(dstLen);
96  if (compress2(buf.data(), &dstLen,
97  reinterpret_cast<const Bytef*>(data),
98  uLong(len), 9)
99  != Z_OK) {
100  throw MSXException("Error while compressing blob.");
101  }
102  tmp = Base64::encode(buf.data(), dstLen);
103  }
104  this->self().beginTag(tag);
105  this->self().attribute("encoding", encoding);
106  Saver<string> saver;
107  saver(this->self(), tmp, false);
108  this->self().endTag(tag);
109 }
110 
113 
115 
117 {
118  auto it = idMap.find(id);
119  return it != end(idMap) ? it->second : nullptr;
120 }
121 
122 void InputArchiveBase2::addPointer(unsigned id, const void* p)
123 {
124  assert(idMap.find(id) == end(idMap));
125  idMap[id] = const_cast<void*>(p);
126 }
127 
128 unsigned InputArchiveBase2::getId(const void* ptr) const
129 {
130  for (const auto& p : idMap) {
131  if (p.second == ptr) return p.first;
132  }
133  return 0;
134 }
135 
136 template<typename Derived>
138  const char* tag, void* data, size_t len)
139 {
140  this->self().beginTag(tag);
141  string encoding;
142  this->self().attribute("encoding", encoding);
143 
144  string_ref tmp = this->self().loadStr();
145  this->self().endTag(tag);
146 
147  if (encoding == "gz-base64") {
148  auto p = Base64::decode(tmp);
149  auto dstLen = uLongf(len); // TODO check for overflow?
150  if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
151  reinterpret_cast<const Bytef*>(p.first.data()), uLong(p.second))
152  != Z_OK) ||
153  (dstLen != len)) {
154  throw MSXException("Error while decompressing blob.");
155  }
156  } else if ((encoding == "hex") || (encoding == "base64")) {
157  bool ok = (encoding == "hex")
158  ? HexDump::decode_inplace(tmp, static_cast<uint8_t*>(data), len)
159  : Base64 ::decode_inplace(tmp, static_cast<uint8_t*>(data), len);
160  if (!ok) {
162  << "Length of decoded blob different from "
163  "expected value (" << len << ')');
164  }
165  } else {
166  throw XMLException("Unsupported encoding \"" + encoding + "\" for blob");
167  }
168 }
169 
170 template class InputArchiveBase<MemInputArchive>;
171 template class InputArchiveBase<XmlInputArchive>;
172 
174 
175 void MemOutputArchive::save(const std::string& s)
176 {
177  auto size = s.size();
178  byte* buf = buffer.allocate(sizeof(size) + size);
179  memcpy(buf, &size, sizeof(size));
180  memcpy(buf + sizeof(size), s.data(), size);
181 }
182 
184 {
185  return buffer.release(size);
186 }
187 
189 
190 void MemInputArchive::load(std::string& s)
191 {
192  size_t length;
193  load(length);
194  s.resize(length);
195  if (length) {
196  get(&s[0], length);
197  }
198 }
199 
201 {
202  size_t length;
203  load(length);
204  const byte* p = buffer.getCurrentPos();
205  buffer.skip(length);
206  return string_ref(reinterpret_cast<const char*>(p), length);
207 }
208 
210 
211 // Too small inputs don't compress very well (often the compressed size is even
212 // bigger than the input). It also takes a relatively long time (because snappy
213 // has a relatively large setup time). I choose this value semi-arbitrary. I
214 // only made it >= 52 so that the (incompressible) RP5C01 registers won't be
215 // compressed.
216 static const size_t SMALL_SIZE = 100;
217 void MemOutputArchive::serialize_blob(const char*, const void* data, size_t len)
218 {
219  // Compress in-memory blobs:
220  //
221  // This is a bit slower than memcpy, but it uses a lot less memory.
222  // Memory usage is important for the reverse feature, where we keep a
223  // lot of savestates in memory.
224  //
225  // I compared 'gzip level=1' (fastest version with lowest compression
226  // ratio) with 'lzo'. lzo was considerably faster. Compression ratio
227  // was about the same (maybe lzo was slightly better (OTOH on higher
228  // levels gzip compresses better)). So I decided to go with lzo.
229  //
230  // Later I compared 'lzo' with 'snappy', lzo compresses 6-25% better,
231  // but 'snappy' is about twice as fast. So I switched to 'snappy'.
232  if (len >= SMALL_SIZE) {
233  size_t dstLen = snappy::maxCompressedLength(len);
234  byte* buf = buffer.allocate(sizeof(dstLen) + dstLen);
235  snappy::compress(static_cast<const char*>(data), len,
236  reinterpret_cast<char*>(&buf[sizeof(dstLen)]), dstLen);
237  memcpy(buf, &dstLen, sizeof(dstLen)); // fill-in actual size
238  buffer.deallocate(&buf[sizeof(dstLen) + dstLen]); // dealloc unused portion
239  } else {
240  byte* buf = buffer.allocate(len);
241  memcpy(buf, data, len);
242  }
243 
244 }
245 
246 void MemInputArchive::serialize_blob(const char*, void* data, size_t len)
247 {
248  if (len >= SMALL_SIZE) {
249  size_t srcLen; load(srcLen);
250  snappy::uncompress(reinterpret_cast<const char*>(buffer.getCurrentPos()),
251  srcLen, reinterpret_cast<char*>(data), len);
252  buffer.skip(srcLen);
253  } else {
254  memcpy(data, buffer.getCurrentPos(), len);
255  buffer.skip(len);
256  }
257 }
258 
260 
261 XmlOutputArchive::XmlOutputArchive(const string& filename)
262  : root("serial")
263 {
264  root.addAttribute("openmsx_version", Version::full());
265  root.addAttribute("date_time", Date::toString(time(nullptr)));
266  root.addAttribute("platform", TARGET_PLATFORM);
267  {
268  auto f = FileOperations::openFile(filename, "wb");
269  if (!f) goto error;
270  int duped_fd = dup(fileno(f.get()));
271  if (duped_fd == -1) goto error;
272  file = gzdopen(duped_fd, "wb9");
273  if (!file) {
274  close(duped_fd);
275  goto error;
276  }
277  current.push_back(&root);
278  return; // success
279  // on scope-exit 'File* f' is closed, and 'gzFile file'
280  // uses the dup()'ed file descriptor.
281  }
282 
283 error:
284  throw XMLException("Could not open compressed file \"" + filename + "\"");
285 }
286 
288 {
289  assert(current.back() == &root);
290  const char* header =
291  "<?xml version=\"1.0\" ?>\n"
292  "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
293  gzwrite(file, const_cast<char*>(header), unsigned(strlen(header)));
294  string dump = root.dump();
295  gzwrite(file, const_cast<char*>(dump.data()), unsigned(dump.size()));
296  gzclose(file);
297 }
298 
300 {
301  save(string(1, c));
302 }
303 void XmlOutputArchive::save(const string& str)
304 {
305  assert(!current.empty());
306  assert(current.back()->getData().empty());
307  current.back()->setData(str);
308 }
310 {
311  assert(!current.empty());
312  assert(current.back()->getData().empty());
313  current.back()->setData(b ? "true" : "false");
314 }
315 void XmlOutputArchive::save(unsigned char b)
316 {
317  save(unsigned(b));
318 }
319 void XmlOutputArchive::save(signed char c)
320 {
321  save(int(c));
322 }
324 {
325  save(int(c));
326 }
328 {
329  saveImpl(i);
330 }
331 void XmlOutputArchive::save(unsigned u)
332 {
333  saveImpl(u);
334 }
335 void XmlOutputArchive::save(unsigned long long ull)
336 {
337  saveImpl(ull);
338 }
339 
340 void XmlOutputArchive::attribute(const char* name, const string& str)
341 {
342  assert(!current.empty());
343  assert(!current.back()->hasAttribute(name));
344  current.back()->addAttribute(name, str);
345 }
346 void XmlOutputArchive::attribute(const char* name, int i)
347 {
348  attributeImpl(name, i);
349 }
350 void XmlOutputArchive::attribute(const char* name, unsigned u)
351 {
352  attributeImpl(name, u);
353 }
354 
355 void XmlOutputArchive::beginTag(const char* tag)
356 {
357  assert(!current.empty());
358  auto& elem = current.back()->addChild(tag);
359  current.push_back(&elem);
360 }
361 void XmlOutputArchive::endTag(const char* tag)
362 {
363  assert(!current.empty());
364  assert(current.back()->getName() == tag); (void)tag;
365  current.pop_back();
366 }
367 
369 
370 XmlInputArchive::XmlInputArchive(const string& filename)
371  : rootElem(XMLLoader::load(filename, "openmsx-serialize.dtd"))
372 {
373  elems.emplace_back(&rootElem, 0);
374 }
375 
377 {
378  if (!elems.back().first->getChildren().empty()) {
379  throw XMLException("No child tags expected for primitive type");
380  }
381  return elems.back().first->getData();
382 }
383 void XmlInputArchive::load(string& t)
384 {
385  t = loadStr().str();
386 }
388 {
389  std::string str;
390  load(str);
391  std::istringstream is(str);
392  is >> c;
393 }
395 {
396  string_ref s = loadStr();
397  if ((s == "true") || (s == "1")) {
398  b = true;
399  } else if ((s == "false") || (s == "0")) {
400  b = false;
401  } else {
402  throw XMLException("Bad value found for boolean: " + s);
403  }
404 }
405 
406 // This function parses a number from a string. It's similar to the generic
407 // templatized XmlInputArchive::load() method, but _much_ faster. It does
408 // have some limitations though:
409 // - it can't handle leading whitespace
410 // - it can't handle extra characters at the end of the string
411 // - it can only handle one base (only decimal, not octal or hexadecimal)
412 // - it doesn't understand a leading '+' sign
413 // - it doesn't detect overflow or underflow (The generic implementation sets
414 // a 'bad' flag on the stream and clips the result to the min/max allowed
415 // value. Though this 'bad' flag was ignored by the openMSX code).
416 // This routine is only used to parse strings we've written ourselves (and the
417 // savestate/replay XML files are not meant to be manually edited). So the
418 // above limitations don't really matter. And we can use the speed gain.
419 template<bool IS_SIGNED> struct ConditionalNegate;
420 template<> struct ConditionalNegate<true> {
421  template<typename T> void operator()(bool negate, T& t) {
422  if (negate) t = -t; // ok to negate a signed type
423  }
424 };
425 template<> struct ConditionalNegate<false> {
426  template<typename T> void operator()(bool negate, T& /*t*/) {
427  assert(!negate); (void)negate; // can't negate unsigned type
428  }
429 };
430 template<typename T> static inline void fastAtoi(string_ref str, T& t)
431 {
432  t = 0;
433  bool neg = false;
434  size_t i = 0;
435  size_t l = str.size();
436 
437  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
438  if (IS_SIGNED) {
439  if (l == 0) return;
440  if (str[0] == '-') {
441  neg = true;
442  i = 1;
443  }
444  }
445  for (; i < l; ++i) {
446  unsigned d = str[i] - '0';
447  if (unlikely(d > 9)) {
448  throw XMLException("Invalid integer: " + str);
449  }
450  t = 10 * t + d;
451  }
452  // The following stuff does the equivalent of:
453  // if (neg) t = -t;
454  // Though this expression triggers a warning on VC++ when T is an
455  // unsigned type. This complex template stuff avoids the warning.
456  ConditionalNegate<IS_SIGNED> negateFunctor;
457  negateFunctor(neg, t);
458 }
460 {
461  string_ref str = loadStr();
462  fastAtoi(str, i);
463 }
464 void XmlInputArchive::load(unsigned& u)
465 {
466  string_ref str = loadStr();
467  fastAtoi(str, u);
468 }
469 void XmlInputArchive::load(unsigned long long& ull)
470 {
471  string_ref str = loadStr();
472  fastAtoi(str, ull);
473 }
474 void XmlInputArchive::load(unsigned char& b)
475 {
476  unsigned i;
477  load(i);
478  b = i;
479 }
480 void XmlInputArchive::load(signed char& c)
481 {
482  int i;
483  load(i);
484  c = i;
485 }
487 {
488  int i;
489  load(i);
490  c = i;
491 }
492 
493 void XmlInputArchive::beginTag(const char* tag)
494 {
495  auto* child = elems.back().first->findNextChild(
496  tag, elems.back().second);
497  if (!child) {
498  string path;
499  for (auto& e : elems) {
500  path += e.first->getName() + '/';
501  }
503  "No child tag \"" << tag <<
504  "\" found at location \"" << path << '\"');
505  }
506  elems.emplace_back(child, 0);
507 }
508 void XmlInputArchive::endTag(const char* tag)
509 {
510  const auto& elem = *elems.back().first;
511  if (elem.getName() != tag) {
512  throw XMLException("End tag \"" + elem.getName() +
513  "\" not equal to begin tag \"" + tag + "\"");
514  }
515  auto& elem2 = const_cast<XMLElement&>(elem);
516  elem2.clearName(); // mark this elem for later beginTag() calls
517  elems.pop_back();
518 }
519 
520 void XmlInputArchive::attribute(const char* name, string& t)
521 {
522  try {
523  t = elems.back().first->getAttribute(name);
524  } catch (ConfigException& ex) {
525  throw XMLException(ex.getMessage());
526  }
527 }
528 void XmlInputArchive::attribute(const char* name, int& i)
529 {
530  attributeImpl(name, i);
531 }
532 void XmlInputArchive::attribute(const char* name, unsigned& u)
533 {
534  attributeImpl(name, u);
535 }
536 bool XmlInputArchive::hasAttribute(const char* name)
537 {
538  return elems.back().first->hasAttribute(name);
539 }
540 bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
541 {
542  return elems.back().first->findAttributeInt(name, value);
543 }
545 {
546  return int(elems.back().first->getChildren().size());
547 }
548 
549 } // namespace openmsx
void attribute(const char *name, T &t)
Definition: serialize.hh:800
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:322
size_t maxCompressedLength(size_t inLen)
Definition: snappy.cc:616
bool findAttribute(const char *name, unsigned &value)
Definition: serialize.cc:540
void serialize_blob(const char *tag, void *data, size_t len)
Definition: serialize.cc:137
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:167
void operator()(bool negate, T &t)
Definition: serialize.cc:421
std::string str() const
Definition: string_ref.cc:12
#define unlikely(x)
Definition: likely.hh:15
void loadChar(char &c)
Definition: serialize.cc:387
XmlInputArchive(const std::string &filename)
Definition: serialize.cc:370
void * getPointer(unsigned id)
Definition: serialize.cc:116
void save(const T &t)
Definition: serialize.hh:601
unsigned getId(const void *p) const
Definition: serialize.cc:128
bool decode_inplace(string_ref input, uint8_t *output, size_t outSize)
Definition: HexDump.cc:74
uint8_t byte
8 bit unsigned integer
Definition: openmsx.hh:26
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
void endTag(const char *tag)
Definition: serialize.cc:508
void serialize_blob(const char *, void *data, size_t len)
Definition: serialize.cc:246
std::string dump() const
Definition: XMLElement.cc:281
void addPointer(unsigned id, const void *p)
Definition: serialize.cc:122
bool decode_inplace(string_ref input, uint8_t *output, size_t outSize)
Definition: Base64.cc:125
MemBuffer< byte > releaseBuffer(size_t &size)
Definition: serialize.cc:183
size_type size() const
Definition: string_ref.hh:55
XMLElement load(string_ref filename, string_ref systemID)
Definition: XMLLoader.cc:31
bool hasAttribute(const char *name)
Definition: serialize.cc:536
void compress(const char *input, size_t inLen, char *output, size_t &outLen)
Definition: snappy.cc:603
void attribute(const char *name, const T &t)
Definition: serialize.hh:737
void endTag(const char *tag)
Definition: serialize.cc:361
void attributeImpl(const char *name, const T &t)
Definition: serialize.hh:733
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition: serialize.hh:205
void serialize_blob(const char *, const void *data, size_t len)
Definition: serialize.cc:217
void save(const T &t)
Definition: serialize.hh:708
const std::string & getMessage() const
Definition: MSXException.hh:14
void uncompress(const char *input, size_t inLen, char *output, size_t outLen)
Definition: snappy.cc:166
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
void operator()(bool negate, T &)
Definition: serialize.cc:426
void beginTag(const char *tag)
Definition: serialize.cc:493
int countChildren() const
Definition: serialize.cc:544
uint8_t * data()
void addAttribute(string_ref name, string_ref value)
Definition: XMLElement.cc:57
size_t size() const
static std::string full()
Definition: Version.cc:7
FILE_t openFile(const std::string &filename, const std::string &mode)
Call fopen() in a platform-independent manner.
void saveImpl(const T &t)
Definition: serialize.hh:702
XmlOutputArchive(const std::string &filename)
Definition: serialize.cc:261
void beginTag(const char *tag)
Definition: serialize.cc:355
void serialize_blob(const char *tag, const void *data, size_t len)
Definition: serialize.cc:77
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:90
void attributeImpl(const char *name, T &t)
Definition: serialize.hh:793