openMSX
ZMBVEncoder.cc
Go to the documentation of this file.
1 // Code based on DOSBox-0.65
2 
3 #include "ZMBVEncoder.hh"
4 #include "FrameSource.hh"
5 #include "PixelOperations.hh"
6 #include "cstd.hh"
7 #include "endian.hh"
8 #include "ranges.hh"
9 #include "unreachable.hh"
10 #include <array>
11 #include <cassert>
12 #include <cstdint>
13 #include <cstdlib>
14 #include <cstring>
15 #include <cmath>
16 #include <tuple>
17 
18 namespace openmsx {
19 
20 constexpr uint8_t DBZV_VERSION_HIGH = 0;
21 constexpr uint8_t DBZV_VERSION_LOW = 1;
22 constexpr uint8_t COMPRESSION_ZLIB = 1;
23 constexpr unsigned MAX_VECTOR = 16;
24 constexpr unsigned BLOCK_WIDTH = MAX_VECTOR;
25 constexpr unsigned BLOCK_HEIGHT = MAX_VECTOR;
26 constexpr unsigned FLAG_KEYFRAME = 0x01;
27 
28 struct CodecVector {
29  int8_t x;
30  int8_t y;
31 };
32 
33 constexpr unsigned VECTOR_TAB_SIZE =
34  1 + // center
35  8 * MAX_VECTOR + // horizontal, vertical, diagonal
36  MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
37 
38 constexpr auto vectorTable = [] {
39  std::array<CodecVector, VECTOR_TAB_SIZE> result = {};
40 
41  unsigned p = 0;
42  // center
43  result[p] = {0, 0};
44  p += 1;
45  // horizontal, vertical, diagonal
46  for (int i = 1; i <= int(MAX_VECTOR); ++i, p += 8) {
47  result[p + 0] = {int8_t( i), int8_t( 0)};
48  result[p + 1] = {int8_t(-i), int8_t( 0)};
49  result[p + 2] = {int8_t( 0), int8_t( i)};
50  result[p + 3] = {int8_t( 0), int8_t(-i)};
51  result[p + 4] = {int8_t( i), int8_t( i)};
52  result[p + 5] = {int8_t(-i), int8_t( i)};
53  result[p + 6] = {int8_t( i), int8_t(-i)};
54  result[p + 7] = {int8_t(-i), int8_t(-i)};
55  }
56  // rest
57  for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
58  for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
59  if (x == y) continue; // already have diagonal
60  result[p + 0] = {int8_t( x), int8_t( y)};
61  result[p + 1] = {int8_t(-x), int8_t( y)};
62  result[p + 2] = {int8_t( x), int8_t(-y)};
63  result[p + 3] = {int8_t(-x), int8_t(-y)};
64  p += 4;
65  }
66  }
67  assert(p == VECTOR_TAB_SIZE);
68 
69  // sort
70  auto compare = [](const CodecVector& l, const CodecVector& r) {
71  auto cost = [](const CodecVector& v) {
72  auto c = cstd::sqrt(double(v.x * v.x + v.y * v.y));
73  if ((v.x == 0) || (v.y == 0)) {
74  // no penalty for purely horizontal/vertical offset
75  c *= 1.0;
76  } else if (cstd::abs(v.x) == cstd::abs(v.y)) {
77  // small penalty for pure diagonal
78  c *= 2.0;
79  } else {
80  // bigger penalty for 'random' direction
81  c *= 4.0;
82  }
83  return c;
84  };
85  return std::tuple(cost(l), l.x, l.y) <
86  std::tuple(cost(r), r.x, r.y);
87  };
88  cstd::sort(result, compare);
89 
90  return result;
91 }();
92 
94  uint8_t high_version;
95  uint8_t low_version;
96  uint8_t compression;
97  uint8_t format;
98  uint8_t blockwidth;
99  uint8_t blockheight;
100 };
101 
102 
103 static inline void writePixel(
104  const PixelOperations<uint16_t>& pixelOps,
105  uint16_t pixel, Endian::L16& dest)
106 {
107  unsigned r = pixelOps.red256(pixel);
108  unsigned g = pixelOps.green256(pixel);
109  unsigned b = pixelOps.blue256(pixel);
110  dest = ((r & 0xF8) << (11 - 3)) | ((g & 0xFC) << (5 - 2)) | (b >> 3);
111 }
112 
113 static inline void writePixel(
114  const PixelOperations<unsigned>& pixelOps,
115  unsigned pixel, Endian::L32& dest)
116 {
117  unsigned r = pixelOps.red256(pixel);
118  unsigned g = pixelOps.green256(pixel);
119  unsigned b = pixelOps.blue256(pixel);
120  dest = (r << 16) | (g << 8) | b;
121 }
122 
123 
124 ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_, unsigned bpp)
125  : width(width_)
126  , height(height_)
127 {
128  setupBuffers(bpp);
129  memset(&zstream, 0, sizeof(zstream));
130  deflateInit(&zstream, 6); // compression level
131 
132  // I did a small test: compression level vs compression speed
133  // (recorded Space Manbow intro, video only)
134  //
135  // level | time | size
136  // ------+--------+----------
137  // 0 | 1m12.6 | 139442594
138  // 1 | 1m12.1 | 5217288
139  // 2 | 1m10.8 | 4887258
140  // 3 | 1m11.8 | 4610668
141  // 4 | 1m13.1 | 3791932 <-- old default
142  // 5 | 1m14.2 | 3602078
143  // 6 | 1m14.5 | 3363766 <-- current default
144  // 7 | 1m15.8 | 3333938
145  // 8 | 1m25.0 | 3301168
146  // 9 | 2m04.1 | 3253706
147  //
148  // Level 6 seems a good compromise between size/speed for THIS test.
149 }
150 
151 void ZMBVEncoder::setupBuffers(unsigned bpp)
152 {
153  switch (bpp) {
154 #if HAVE_16BPP
155  case 15:
156  case 16:
157  format = ZMBV_FORMAT_16BPP;
158  pixelSize = 2;
159  break;
160 #endif
161 #if HAVE_32BPP
162  case 32:
163  format = ZMBV_FORMAT_32BPP;
164  pixelSize = 4;
165  break;
166 #endif
167  default:
168  UNREACHABLE;
169  }
170 
171  pitch = width + 2 * MAX_VECTOR;
172  unsigned bufsize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
173 
174  oldframe.resize(bufsize);
175  newframe.resize(bufsize);
176  memset(oldframe.data(), 0, bufsize);
177  memset(newframe.data(), 0, bufsize);
178  work.resize(bufsize);
179  outputSize = neededSize();
180  output.resize(outputSize);
181 
182  assert((width % BLOCK_WIDTH ) == 0);
183  assert((height % BLOCK_HEIGHT) == 0);
184  unsigned xBlocks = width / BLOCK_WIDTH;
185  unsigned yBlocks = height / BLOCK_HEIGHT;
186  blockOffsets.resize(xBlocks * yBlocks);
187  for (auto y : xrange(yBlocks)) {
188  for (auto x : xrange(xBlocks)) {
189  blockOffsets[y * xBlocks + x] =
190  ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
191  (x * BLOCK_WIDTH) + MAX_VECTOR;
192  }
193  }
194 }
195 
196 unsigned ZMBVEncoder::neededSize() const
197 {
198  unsigned f = pixelSize;
199  f = f * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
200  return f + f / 1000;
201 }
202 
203 template<typename P>
204 unsigned ZMBVEncoder::possibleBlock(int vx, int vy, unsigned offset)
205 {
206  int ret = 0;
207  auto* pOld = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
208  auto* pNew = &(reinterpret_cast<P*>(newframe.data()))[offset];
209  for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
210  for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
211  if (pOld[x] != pNew[x]) ++ret;
212  }
213  pOld += pitch * 4;
214  pNew += pitch * 4;
215  }
216  return ret;
217 }
218 
219 template<typename P>
220 unsigned ZMBVEncoder::compareBlock(int vx, int vy, unsigned offset)
221 {
222  int ret = 0;
223  auto* pOld = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
224  auto* pNew = &(reinterpret_cast<P*>(newframe.data()))[offset];
225  repeat(BLOCK_HEIGHT, [&] {
226  for (auto x : xrange(BLOCK_WIDTH)) {
227  if (pOld[x] != pNew[x]) ++ret;
228  }
229  pOld += pitch;
230  pNew += pitch;
231  });
232  return ret;
233 }
234 
235 template<typename P>
236 void ZMBVEncoder::addXorBlock(
237  const PixelOperations<P>& pixelOps, int vx, int vy, unsigned offset, unsigned& workUsed)
238 {
239  using LE_P = typename Endian::Little<P>::type;
240 
241  auto* pOld = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
242  auto* pNew = &(reinterpret_cast<P*>(newframe.data()))[offset];
243  repeat(BLOCK_HEIGHT, [&] {
244  for (auto x : xrange(BLOCK_WIDTH)) {
245  P pXor = pNew[x] ^ pOld[x];
246  writePixel(pixelOps, pXor, *reinterpret_cast<LE_P*>(&work[workUsed]));
247  workUsed += sizeof(P);
248  }
249  pOld += pitch;
250  pNew += pitch;
251  });
252 }
253 
254 template<typename P>
255 void ZMBVEncoder::addXorFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
256 {
257  PixelOperations<P> pixelOps(pixelFormat);
258  auto* vectors = reinterpret_cast<int8_t*>(&work[workUsed]);
259 
260  unsigned xBlocks = width / BLOCK_WIDTH;
261  unsigned yBlocks = height / BLOCK_HEIGHT;
262  unsigned blockcount = xBlocks * yBlocks;
263 
264  // Align the following xor data on 4 byte boundary
265  workUsed = (workUsed + blockcount * 2 + 3) & ~3;
266 
267  int bestVx = 0;
268  int bestVy = 0;
269  for (auto b : xrange(blockcount)) {
270  unsigned offset = blockOffsets[b];
271  // first try best vector of previous block
272  unsigned bestchange = compareBlock<P>(bestVx, bestVy, offset);
273  if (bestchange >= 4) {
274  int possibles = 64;
275  for (const auto& v : vectorTable) {
276  if (possibleBlock<P>(v.x, v.y, offset) < 4) {
277  unsigned testchange = compareBlock<P>(v.x, v.y, offset);
278  if (testchange < bestchange) {
279  bestchange = testchange;
280  bestVx = v.x;
281  bestVy = v.y;
282  if (bestchange < 4) break;
283  }
284  --possibles;
285  if (possibles == 0) break;
286  }
287  }
288  }
289  vectors[b * 2 + 0] = (bestVx << 1);
290  vectors[b * 2 + 1] = (bestVy << 1);
291  if (bestchange) {
292  vectors[b * 2 + 0] |= 1;
293  addXorBlock<P>(pixelOps, bestVx, bestVy, offset, workUsed);
294  }
295  }
296 }
297 
298 template<typename P>
299 void ZMBVEncoder::addFullFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
300 {
301  using LE_P = typename Endian::Little<P>::type;
302 
303  PixelOperations<P> pixelOps(pixelFormat);
304  auto* readFrame =
305  &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
306  repeat(height, [&] {
307  auto* pixelsIn = reinterpret_cast<P*> (readFrame);
308  auto* pixelsOut = reinterpret_cast<LE_P*>(&work[workUsed]);
309  for (auto x : xrange(width)) {
310  writePixel(pixelOps, pixelsIn[x], pixelsOut[x]);
311  }
312  readFrame += pitch * sizeof(P);
313  workUsed += width * sizeof(P);
314  });
315 }
316 
317 const void* ZMBVEncoder::getScaledLine(FrameSource* frame, unsigned y, void* workBuf_) const
318 {
319 #if HAVE_32BPP
320  if (pixelSize == 4) { // 32bpp
321  auto* workBuf = static_cast<uint32_t*>(workBuf_);
322  switch (height) {
323  case 240:
324  return frame->getLinePtr320_240(y, workBuf);
325  case 480:
326  return frame->getLinePtr640_480(y, workBuf);
327  case 720:
328  return frame->getLinePtr960_720(y, workBuf);
329  default:
330  UNREACHABLE;
331  }
332  }
333 #endif
334 #if HAVE_16BPP
335  if (pixelSize == 2) { // 15bpp or 16bpp
336  auto* workBuf = static_cast<uint16_t*>(workBuf_);
337  switch (height) {
338  case 240:
339  return frame->getLinePtr320_240(y, workBuf);
340  case 480:
341  return frame->getLinePtr640_480(y, workBuf);
342  case 720:
343  return frame->getLinePtr960_720(y, workBuf);
344  default:
345  UNREACHABLE;
346  }
347  }
348 #endif
349  UNREACHABLE;
350  return nullptr; // avoid warning
351 }
352 
354 {
355  std::swap(newframe, oldframe); // replace oldframe with newframe
356 
357  // Reset the work buffer
358  unsigned workUsed = 0;
359  unsigned writeDone = 1;
360  uint8_t* writeBuf = output.data();
361 
362  output[0] = 0; // first byte contains info about this frame
363  if (keyFrame) {
364  output[0] |= FLAG_KEYFRAME;
365  auto* header = reinterpret_cast<KeyframeHeader*>(
366  writeBuf + writeDone);
368  header->low_version = DBZV_VERSION_LOW;
369  header->compression = COMPRESSION_ZLIB;
370  header->format = format;
371  header->blockwidth = BLOCK_WIDTH;
372  header->blockheight = BLOCK_HEIGHT;
373  writeDone += sizeof(KeyframeHeader);
374  deflateReset(&zstream); // restart deflate
375  }
376 
377  // copy lines (to add black border)
378  unsigned linePitch = pitch * pixelSize;
379  unsigned lineWidth = width * pixelSize;
380  uint8_t* dest =
381  &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
382  for (auto i : xrange(height)) {
383  const auto* scaled = getScaledLine(frame, i, dest);
384  if (scaled != dest) memcpy(dest, scaled, lineWidth);
385  dest += linePitch;
386  }
387 
388  // Add the frame data.
389  if (keyFrame) {
390  // Key frame: full frame data.
391  switch (pixelSize) {
392 #if HAVE_16BPP
393  case 2:
394  addFullFrame<uint16_t>(frame->getPixelFormat(), workUsed);
395  break;
396 #endif
397 #if HAVE_32BPP
398  case 4:
399  addFullFrame<uint32_t>(frame->getPixelFormat(), workUsed);
400  break;
401 #endif
402  default:
403  UNREACHABLE;
404  }
405  } else {
406  // Non-key frame: delta frame data.
407  switch (pixelSize) {
408 #if HAVE_16BPP
409  case 2:
410  addXorFrame<uint16_t>(frame->getPixelFormat(), workUsed);
411  break;
412 #endif
413 #if HAVE_32BPP
414  case 4:
415  addXorFrame<uint32_t>(frame->getPixelFormat(), workUsed);
416  break;
417 #endif
418  default:
419  UNREACHABLE;
420  }
421  }
422  // Compress the frame data with zlib.
423  zstream.next_in = work.data();
424  zstream.avail_in = workUsed;
425  zstream.total_in = 0;
426 
427  zstream.next_out = static_cast<Bytef*>(writeBuf + writeDone);
428  zstream.avail_out = outputSize - writeDone;
429  zstream.total_out = 0;
430  auto r = deflate(&zstream, Z_SYNC_FLUSH);
431  assert(r == Z_OK); (void)r;
432 
433  return {output.data(), writeDone + zstream.total_out};
434 }
435 
436 } // namespace openmsx
openmsx::BLOCK_HEIGHT
constexpr unsigned BLOCK_HEIGHT
Definition: ZMBVEncoder.cc:25
openmsx::CodecVector
Definition: ZMBVEncoder.cc:28
cstd::sort
constexpr void sort(RAIt first, RAIt last, Compare cmp=Compare{})
Definition: cstd.hh:64
openmsx::ZMBVEncoder::ZMBVEncoder
ZMBVEncoder(unsigned width, unsigned height, unsigned bpp)
Definition: ZMBVEncoder.cc:124
openmsx::PixelOperations< uint16_t >
openmsx::KeyframeHeader::blockwidth
uint8_t blockwidth
Definition: ZMBVEncoder.cc:98
xrange
constexpr auto xrange(T e)
Definition: xrange.hh:155
FrameSource.hh
openmsx::PixelOperations::blue256
unsigned blue256(Pixel p) const
Definition: PixelOperations.hh:332
cstd.hh
openmsx::vectorTable
constexpr auto vectorTable
Definition: ZMBVEncoder.cc:38
openmsx::CodecVector::y
int8_t y
Definition: ZMBVEncoder.cc:30
openmsx::KeyframeHeader::compression
uint8_t compression
Definition: ZMBVEncoder.cc:96
ZMBVEncoder.hh
ranges.hh
openmsx::COMPRESSION_ZLIB
constexpr uint8_t COMPRESSION_ZLIB
Definition: ZMBVEncoder.cc:22
openmsx::FLAG_KEYFRAME
constexpr unsigned FLAG_KEYFRAME
Definition: ZMBVEncoder.cc:26
repeat
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
Definition: xrange.hh:170
openmsx::ZMBVEncoder::compressFrame
span< const uint8_t > compressFrame(bool keyFrame, FrameSource *frame)
Definition: ZMBVEncoder.cc:353
span< const uint8_t >
openmsx::DBZV_VERSION_LOW
constexpr uint8_t DBZV_VERSION_LOW
Definition: ZMBVEncoder.cc:21
UNREACHABLE
#define UNREACHABLE
Definition: unreachable.hh:38
openmsx::MemBuffer::resize
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:111
openmsx::KeyframeHeader::high_version
uint8_t high_version
Definition: ZMBVEncoder.cc:94
openmsx::CodecVector::x
int8_t x
Definition: ZMBVEncoder.cc:29
openmsx::writePixel
Pixel writePixel(uint32_t p)
Definition: HQCommon.hh:32
cstd::sqrt
constexpr double sqrt(double x)
Definition: cstd.hh:367
openmsx::KeyframeHeader::blockheight
uint8_t blockheight
Definition: ZMBVEncoder.cc:99
openmsx::VECTOR_TAB_SIZE
constexpr unsigned VECTOR_TAB_SIZE
Definition: ZMBVEncoder.cc:33
openmsx::PixelOperations::green256
unsigned green256(Pixel p) const
Definition: PixelOperations.hh:323
openmsx::FrameSource::getPixelFormat
const PixelFormat & getPixelFormat() const
Definition: FrameSource.hh:190
openmsx::MAX_VECTOR
constexpr unsigned MAX_VECTOR
Definition: ZMBVEncoder.cc:23
cstd::abs
constexpr T abs(T t)
Definition: cstd.hh:125
endian.hh
Endian::EndianT
Definition: endian.hh:71
g
int g
Definition: ScopedAssign_test.cc:20
PixelOperations.hh
openmsx::KeyframeHeader::format
uint8_t format
Definition: ZMBVEncoder.cc:97
openmsx::DBZV_VERSION_HIGH
constexpr uint8_t DBZV_VERSION_HIGH
Definition: ZMBVEncoder.cc:20
openmsx::x
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:1414
Endian::Little
Definition: endian.hh:286
openmsx::FrameSource
Interface for getting lines from a video frame.
Definition: FrameSource.hh:15
openmsx::PixelOperations::red256
unsigned red256(Pixel p) const
Same as above, but result is scaled to [0..255].
Definition: PixelOperations.hh:314
openmsx::MemBuffer::data
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
openmsx::BLOCK_WIDTH
constexpr unsigned BLOCK_WIDTH
Definition: ZMBVEncoder.cc:24
unreachable.hh
openmsx::KeyframeHeader::low_version
uint8_t low_version
Definition: ZMBVEncoder.cc:95
openmsx
This file implemented 3 utility functions:
Definition: Autofire.cc:5
openmsx::KeyframeHeader
Definition: ZMBVEncoder.cc:93