openMSX
ZMBVEncoder.cc
Go to the documentation of this file.
1 // Code based on DOSBox-0.65
2 
3 #include "ZMBVEncoder.hh"
4 #include "FrameSource.hh"
5 #include "PixelOperations.hh"
6 #include "endian.hh"
7 #include "ranges.hh"
8 #include "unreachable.hh"
9 #include <cassert>
10 #include <cstdlib>
11 #include <cstring>
12 #include <cmath>
13 
14 namespace openmsx {
15 
16 static const uint8_t DBZV_VERSION_HIGH = 0;
17 static const uint8_t DBZV_VERSION_LOW = 1;
18 static const uint8_t COMPRESSION_ZLIB = 1;
19 static const unsigned MAX_VECTOR = 16;
20 static const unsigned BLOCK_WIDTH = MAX_VECTOR;
21 static const unsigned BLOCK_HEIGHT = MAX_VECTOR;
22 static const unsigned FLAG_KEYFRAME = 0x01;
23 
24 struct CodecVector {
25  float cost() const {
26  float c = sqrtf(float(x * x + y * y));
27  if ((x == 0) || (y == 0)) {
28  // no penalty for purely horizontal/vertical offset
29  c *= 1.0f;
30  } else if (abs(x) == abs(y)) {
31  // small penalty for pure diagonal
32  c *= 2.0f;
33  } else {
34  // bigger penalty for 'random' direction
35  c *= 4.0f;
36  }
37  return c;
38  }
39  int x;
40  int y;
41 };
42 static inline bool operator<(const CodecVector& l, const CodecVector& r)
43 {
44  return l.cost() < r.cost();
45 }
46 
47 static const unsigned VECTOR_TAB_SIZE =
48  1 + // center
49  8 * MAX_VECTOR + // horizontal, vertial, diagonal
50  MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
51 CodecVector vectorTable[VECTOR_TAB_SIZE];
52 
54  uint8_t high_version;
55  uint8_t low_version;
56  uint8_t compression;
57  uint8_t format;
58  uint8_t blockwidth;
59  uint8_t blockheight;
60 };
61 
62 const char* ZMBVEncoder::CODEC_4CC = "ZMBV";
63 
64 
65 static inline void writePixel(
66  const PixelOperations<uint16_t>& pixelOps,
67  uint16_t pixel, Endian::L16& dest)
68 {
69  unsigned r = pixelOps.red256(pixel);
70  unsigned g = pixelOps.green256(pixel);
71  unsigned b = pixelOps.blue256(pixel);
72  dest = ((r & 0xF8) << (11 - 3)) | ((g & 0xFC) << (5 - 2)) | (b >> 3);
73 }
74 
75 static inline void writePixel(
76  const PixelOperations<unsigned>& pixelOps,
77  unsigned pixel, Endian::L32& dest)
78 {
79  unsigned r = pixelOps.red256(pixel);
80  unsigned g = pixelOps.green256(pixel);
81  unsigned b = pixelOps.blue256(pixel);
82  dest = (r << 16) | (g << 8) | b;
83 }
84 
85 static void createVectorTable()
86 {
87  unsigned p = 0;
88  // center
89  vectorTable[p] = {0, 0};
90  p += 1;
91  // horizontal, vertial, diagonal
92  for (int i = 1; i <= int(MAX_VECTOR); ++i) {
93  vectorTable[p + 0] = { i, 0};
94  vectorTable[p + 1] = {-i, 0};
95  vectorTable[p + 2] = { 0, i};
96  vectorTable[p + 3] = { 0,-i};
97  vectorTable[p + 4] = { i, i};
98  vectorTable[p + 5] = {-i, i};
99  vectorTable[p + 6] = { i,-i};
100  vectorTable[p + 7] = {-i,-i};
101  p += 8;
102  }
103  // rest
104  for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
105  for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
106  if (x == y) continue; // already have diagonal
107  vectorTable[p + 0] = { x, y};
108  vectorTable[p + 1] = {-x, y};
109  vectorTable[p + 2] = { x,-y};
110  vectorTable[p + 3] = {-x,-y};
111  p += 4;
112  }
113  }
114  assert(p == VECTOR_TAB_SIZE);
115 
116  ranges::sort(vectorTable);
117 }
118 
119 ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_, unsigned bpp)
120  : width(width_)
121  , height(height_)
122 {
123  setupBuffers(bpp);
124  createVectorTable();
125  memset(&zstream, 0, sizeof(zstream));
126  deflateInit(&zstream, 6); // compression level
127 
128  // I did a small test: compression level vs compression speed
129  // (recorded Space Manbow intro, video only)
130  //
131  // level | time | size
132  // ------+--------+----------
133  // 0 | 1m12.6 | 139442594
134  // 1 | 1m12.1 | 5217288
135  // 2 | 1m10.8 | 4887258
136  // 3 | 1m11.8 | 4610668
137  // 4 | 1m13.1 | 3791932 <-- old default
138  // 5 | 1m14.2 | 3602078
139  // 6 | 1m14.5 | 3363766 <-- current default
140  // 7 | 1m15.8 | 3333938
141  // 8 | 1m25.0 | 3301168
142  // 9 | 2m04.1 | 3253706
143  //
144  // Level 6 seems a good compromise between size/speed for THIS test.
145 }
146 
147 void ZMBVEncoder::setupBuffers(unsigned bpp)
148 {
149  switch (bpp) {
150 #if HAVE_16BPP
151  case 15:
152  case 16:
153  format = ZMBV_FORMAT_16BPP;
154  pixelSize = 2;
155  break;
156 #endif
157 #if HAVE_32BPP
158  case 32:
159  format = ZMBV_FORMAT_32BPP;
160  pixelSize = 4;
161  break;
162 #endif
163  default:
164  UNREACHABLE;
165  }
166 
167  pitch = width + 2 * MAX_VECTOR;
168  unsigned bufsize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
169 
170  oldframe.resize(bufsize);
171  newframe.resize(bufsize);
172  memset(oldframe.data(), 0, bufsize);
173  memset(newframe.data(), 0, bufsize);
174  work.resize(bufsize);
175  outputSize = neededSize();
176  output.resize(outputSize);
177 
178  assert((width % BLOCK_WIDTH ) == 0);
179  assert((height % BLOCK_HEIGHT) == 0);
180  unsigned xblocks = width / BLOCK_WIDTH;
181  unsigned yblocks = height / BLOCK_HEIGHT;
182  blockOffsets.resize(xblocks * yblocks);
183  for (unsigned y = 0; y < yblocks; ++y) {
184  for (unsigned x = 0; x < xblocks; ++x) {
185  blockOffsets[y * xblocks + x] =
186  ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
187  (x * BLOCK_WIDTH) + MAX_VECTOR;
188  }
189  }
190 }
191 
192 unsigned ZMBVEncoder::neededSize()
193 {
194  unsigned f = pixelSize;
195  f = f * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
196  return f + f / 1000;
197 }
198 
199 template<class P>
200 unsigned ZMBVEncoder::possibleBlock(int vx, int vy, unsigned offset)
201 {
202  int ret = 0;
203  auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
204  auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
205  for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
206  for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
207  if (pold[x] != pnew[x]) ++ret;
208  }
209  pold += pitch * 4;
210  pnew += pitch * 4;
211  }
212  return ret;
213 }
214 
215 template<class P>
216 unsigned ZMBVEncoder::compareBlock(int vx, int vy, unsigned offset)
217 {
218  int ret = 0;
219  auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
220  auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
221  for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
222  for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
223  if (pold[x] != pnew[x]) ++ret;
224  }
225  pold += pitch;
226  pnew += pitch;
227  }
228  return ret;
229 }
230 
231 template<class P>
232 void ZMBVEncoder::addXorBlock(
233  const PixelOperations<P>& pixelOps, int vx, int vy, unsigned offset, unsigned& workUsed)
234 {
235  using LE_P = typename Endian::Little<P>::type;
236 
237  auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
238  auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
239  for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
240  for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
241  P pxor = pnew[x] ^ pold[x];
242  writePixel(pixelOps, pxor, *reinterpret_cast<LE_P*>(&work[workUsed]));
243  workUsed += sizeof(P);
244  }
245  pold += pitch;
246  pnew += pitch;
247  }
248 }
249 
250 template<class P>
251 void ZMBVEncoder::addXorFrame(const SDL_PixelFormat& pixelFormat, unsigned& workUsed)
252 {
253  PixelOperations<P> pixelOps(pixelFormat);
254  auto* vectors = reinterpret_cast<int8_t*>(&work[workUsed]);
255 
256  unsigned xblocks = width / BLOCK_WIDTH;
257  unsigned yblocks = height / BLOCK_HEIGHT;
258  unsigned blockcount = xblocks * yblocks;
259 
260  // Align the following xor data on 4 byte boundary
261  workUsed = (workUsed + blockcount * 2 + 3) & ~3;
262 
263  int bestvx = 0;
264  int bestvy = 0;
265  for (unsigned b = 0; b < blockcount; ++b) {
266  unsigned offset = blockOffsets[b];
267  // first try best vector of previous block
268  unsigned bestchange = compareBlock<P>(bestvx, bestvy, offset);
269  if (bestchange >= 4) {
270  int possibles = 64;
271  for (auto& v : vectorTable) {
272  if (possibleBlock<P>(v.x, v.y, offset) < 4) {
273  unsigned testchange = compareBlock<P>(v.x, v.y, offset);
274  if (testchange < bestchange) {
275  bestchange = testchange;
276  bestvx = v.x;
277  bestvy = v.y;
278  if (bestchange < 4) break;
279  }
280  --possibles;
281  if (possibles == 0) break;
282  }
283  }
284  }
285  vectors[b * 2 + 0] = (bestvx << 1);
286  vectors[b * 2 + 1] = (bestvy << 1);
287  if (bestchange) {
288  vectors[b * 2 + 0] |= 1;
289  addXorBlock<P>(pixelOps, bestvx, bestvy, offset, workUsed);
290  }
291  }
292 }
293 
294 template<class P>
295 void ZMBVEncoder::addFullFrame(const SDL_PixelFormat& pixelFormat, unsigned& workUsed)
296 {
297  using LE_P = typename Endian::Little<P>::type;
298 
299  PixelOperations<P> pixelOps(pixelFormat);
300  auto* readFrame =
301  &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
302  for (unsigned y = 0; y < height; ++y) {
303  auto* pixelsIn = reinterpret_cast<P*> (readFrame);
304  auto* pixelsOut = reinterpret_cast<LE_P*>(&work[workUsed]);
305  for (unsigned x = 0; x < width; ++x) {
306  writePixel(pixelOps, pixelsIn[x], pixelsOut[x]);
307  }
308  readFrame += pitch * sizeof(P);
309  workUsed += width * sizeof(P);
310  }
311 }
312 
313 const void* ZMBVEncoder::getScaledLine(FrameSource* frame, unsigned y, void* workBuf_)
314 {
315 #if HAVE_32BPP
316  if (pixelSize == 4) { // 32bpp
317  auto* workBuf = static_cast<uint32_t*>(workBuf_);
318  switch (height) {
319  case 240:
320  return frame->getLinePtr320_240(y, workBuf);
321  case 480:
322  return frame->getLinePtr640_480(y, workBuf);
323  case 720:
324  return frame->getLinePtr960_720(y, workBuf);
325  default:
326  UNREACHABLE;
327  }
328  }
329 #endif
330 #if HAVE_16BPP
331  if (pixelSize == 2) { // 15bpp or 16bpp
332  auto* workBuf = static_cast<uint16_t*>(workBuf_);
333  switch (height) {
334  case 240:
335  return frame->getLinePtr320_240(y, workBuf);
336  case 480:
337  return frame->getLinePtr640_480(y, workBuf);
338  case 720:
339  return frame->getLinePtr960_720(y, workBuf);
340  default:
341  UNREACHABLE;
342  }
343  }
344 #endif
345  UNREACHABLE;
346  return nullptr; // avoid warning
347 }
348 
349 void ZMBVEncoder::compressFrame(bool keyFrame, FrameSource* frame,
350  void*& buffer, unsigned& written)
351 {
352  std::swap(newframe, oldframe); // replace oldframe with newframe
353 
354  // Reset the work buffer
355  unsigned workUsed = 0;
356  unsigned writeDone = 1;
357  uint8_t* writeBuf = output.data();
358 
359  output[0] = 0; // first byte contains info about this frame
360  if (keyFrame) {
361  output[0] |= FLAG_KEYFRAME;
362  auto* header = reinterpret_cast<KeyframeHeader*>(
363  writeBuf + writeDone);
364  header->high_version = DBZV_VERSION_HIGH;
365  header->low_version = DBZV_VERSION_LOW;
366  header->compression = COMPRESSION_ZLIB;
367  header->format = format;
368  header->blockwidth = BLOCK_WIDTH;
369  header->blockheight = BLOCK_HEIGHT;
370  writeDone += sizeof(KeyframeHeader);
371  deflateReset(&zstream); // restart deflate
372  }
373 
374  // copy lines (to add black border)
375  unsigned linePitch = pitch * pixelSize;
376  unsigned lineWidth = width * pixelSize;
377  uint8_t* dest =
378  &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
379  for (unsigned i = 0; i < height; ++i) {
380  auto* scaled = getScaledLine(frame, i, dest);
381  if (scaled != dest) memcpy(dest, scaled, lineWidth);
382  dest += linePitch;
383  }
384 
385  // Add the frame data.
386  if (keyFrame) {
387  // Key frame: full frame data.
388  switch (pixelSize) {
389 #if HAVE_16BPP
390  case 2:
391  addFullFrame<uint16_t>(frame->getSDLPixelFormat(), workUsed);
392  break;
393 #endif
394 #if HAVE_32BPP
395  case 4:
396  addFullFrame<uint32_t>(frame->getSDLPixelFormat(), workUsed);
397  break;
398 #endif
399  default:
400  UNREACHABLE;
401  }
402  } else {
403  // Non-key frame: delta frame data.
404  switch (pixelSize) {
405 #if HAVE_16BPP
406  case 2:
407  addXorFrame<uint16_t>(frame->getSDLPixelFormat(), workUsed);
408  break;
409 #endif
410 #if HAVE_32BPP
411  case 4:
412  addXorFrame<uint32_t>(frame->getSDLPixelFormat(), workUsed);
413  break;
414 #endif
415  default:
416  UNREACHABLE;
417  }
418  }
419  // Compress the frame data with zlib.
420  zstream.next_in = work.data();
421  zstream.avail_in = workUsed;
422  zstream.total_in = 0;
423 
424  zstream.next_out = static_cast<Bytef*>(writeBuf + writeDone);
425  zstream.avail_out = outputSize - writeDone;
426  zstream.total_out = 0;
427  deflate(&zstream, Z_SYNC_FLUSH);
428 
429  buffer = output.data();
430  written = writeDone + zstream.total_out;
431 }
432 
433 } // namespace openmsx
void swap(optional< T > &x, optional< T > &y) noexcept(noexcept(x.swap(y)))
Definition: optional.hh:816
void compressFrame(bool keyFrame, FrameSource *frame, void *&buffer, unsigned &written)
Definition: ZMBVEncoder.cc:349
unsigned blue256(Pixel p) const
Interface for getting lines from a video frame.
Definition: FrameSource.hh:14
static const char * CODEC_4CC
Definition: ZMBVEncoder.hh:20
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:120
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:90
float cost() const
Definition: ZMBVEncoder.cc:25
void sort(RandomAccessRange &&range)
Definition: ranges.hh:35
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
constexpr bool operator<(const optional< T > &x, const optional< T > &y)
Definition: optional.hh:515
unsigned red256(Pixel p) const
Same as above, but result is scaled to [0..255].
int g
const Pixel * getLinePtr640_480(unsigned line, Pixel *buf) const
Get a pointer to a given line in this frame, the frame is scaled to 640x480 pixels.
Definition: FrameSource.cc:38
ZMBVEncoder(unsigned width, unsigned height, unsigned bpp)
Definition: ZMBVEncoder.cc:119
const Pixel * getLinePtr320_240(unsigned line, Pixel *buf) const
Get a pointer to a given line in this frame, the frame is scaled to 320x240 pixels.
Definition: FrameSource.cc:21
CodecVector vectorTable[VECTOR_TAB_SIZE]
Definition: ZMBVEncoder.cc:51
const Pixel * getLinePtr960_720(unsigned line, Pixel *buf) const
Get a pointer to a given line in this frame, the frame is scaled to 960x720 pixels.
Definition: FrameSource.cc:49
const SDL_PixelFormat & getSDLPixelFormat() const
Definition: FrameSource.hh:190
unsigned green256(Pixel p) const
#define UNREACHABLE
Definition: unreachable.hh:38