openMSX
ZMBVEncoder.cc
Go to the documentation of this file.
1 // Code based on DOSBox-0.65
2 
3 #include "ZMBVEncoder.hh"
4 #include "FrameSource.hh"
5 #include "PixelOperations.hh"
6 #include "endian.hh"
7 #include "ranges.hh"
8 #include "unreachable.hh"
9 #include <cassert>
10 #include <cstdlib>
11 #include <cstring>
12 #include <cmath>
13 
14 namespace openmsx {
15 
16 constexpr uint8_t DBZV_VERSION_HIGH = 0;
17 constexpr uint8_t DBZV_VERSION_LOW = 1;
18 constexpr uint8_t COMPRESSION_ZLIB = 1;
19 constexpr unsigned MAX_VECTOR = 16;
20 constexpr unsigned BLOCK_WIDTH = MAX_VECTOR;
21 constexpr unsigned BLOCK_HEIGHT = MAX_VECTOR;
22 constexpr unsigned FLAG_KEYFRAME = 0x01;
23 
24 struct CodecVector {
25  float cost() const {
26  float c = sqrtf(float(x * x + y * y));
27  if ((x == 0) || (y == 0)) {
28  // no penalty for purely horizontal/vertical offset
29  c *= 1.0f;
30  } else if (abs(x) == abs(y)) {
31  // small penalty for pure diagonal
32  c *= 2.0f;
33  } else {
34  // bigger penalty for 'random' direction
35  c *= 4.0f;
36  }
37  return c;
38  }
39  int x;
40  int y;
41 };
42 static inline bool operator<(const CodecVector& l, const CodecVector& r)
43 {
44  return l.cost() < r.cost();
45 }
46 
47 constexpr unsigned VECTOR_TAB_SIZE =
48  1 + // center
49  8 * MAX_VECTOR + // horizontal, vertial, diagonal
50  MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
52 
54  uint8_t high_version;
55  uint8_t low_version;
56  uint8_t compression;
57  uint8_t format;
58  uint8_t blockwidth;
59  uint8_t blockheight;
60 };
61 
62 
63 static inline void writePixel(
64  const PixelOperations<uint16_t>& pixelOps,
65  uint16_t pixel, Endian::L16& dest)
66 {
67  unsigned r = pixelOps.red256(pixel);
68  unsigned g = pixelOps.green256(pixel);
69  unsigned b = pixelOps.blue256(pixel);
70  dest = ((r & 0xF8) << (11 - 3)) | ((g & 0xFC) << (5 - 2)) | (b >> 3);
71 }
72 
73 static inline void writePixel(
74  const PixelOperations<unsigned>& pixelOps,
75  unsigned pixel, Endian::L32& dest)
76 {
77  unsigned r = pixelOps.red256(pixel);
78  unsigned g = pixelOps.green256(pixel);
79  unsigned b = pixelOps.blue256(pixel);
80  dest = (r << 16) | (g << 8) | b;
81 }
82 
83 static void createVectorTable()
84 {
85  unsigned p = 0;
86  // center
87  vectorTable[p] = {0, 0};
88  p += 1;
89  // horizontal, vertial, diagonal
90  for (int i = 1; i <= int(MAX_VECTOR); ++i) {
91  vectorTable[p + 0] = { i, 0};
92  vectorTable[p + 1] = {-i, 0};
93  vectorTable[p + 2] = { 0, i};
94  vectorTable[p + 3] = { 0,-i};
95  vectorTable[p + 4] = { i, i};
96  vectorTable[p + 5] = {-i, i};
97  vectorTable[p + 6] = { i,-i};
98  vectorTable[p + 7] = {-i,-i};
99  p += 8;
100  }
101  // rest
102  for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
103  for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
104  if (x == y) continue; // already have diagonal
105  vectorTable[p + 0] = { x, y};
106  vectorTable[p + 1] = {-x, y};
107  vectorTable[p + 2] = { x,-y};
108  vectorTable[p + 3] = {-x,-y};
109  p += 4;
110  }
111  }
112  assert(p == VECTOR_TAB_SIZE);
113 
115 }
116 
117 ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_, unsigned bpp)
118  : width(width_)
119  , height(height_)
120 {
121  setupBuffers(bpp);
122  createVectorTable();
123  memset(&zstream, 0, sizeof(zstream));
124  deflateInit(&zstream, 6); // compression level
125 
126  // I did a small test: compression level vs compression speed
127  // (recorded Space Manbow intro, video only)
128  //
129  // level | time | size
130  // ------+--------+----------
131  // 0 | 1m12.6 | 139442594
132  // 1 | 1m12.1 | 5217288
133  // 2 | 1m10.8 | 4887258
134  // 3 | 1m11.8 | 4610668
135  // 4 | 1m13.1 | 3791932 <-- old default
136  // 5 | 1m14.2 | 3602078
137  // 6 | 1m14.5 | 3363766 <-- current default
138  // 7 | 1m15.8 | 3333938
139  // 8 | 1m25.0 | 3301168
140  // 9 | 2m04.1 | 3253706
141  //
142  // Level 6 seems a good compromise between size/speed for THIS test.
143 }
144 
145 void ZMBVEncoder::setupBuffers(unsigned bpp)
146 {
147  switch (bpp) {
148 #if HAVE_16BPP
149  case 15:
150  case 16:
151  format = ZMBV_FORMAT_16BPP;
152  pixelSize = 2;
153  break;
154 #endif
155 #if HAVE_32BPP
156  case 32:
157  format = ZMBV_FORMAT_32BPP;
158  pixelSize = 4;
159  break;
160 #endif
161  default:
162  UNREACHABLE;
163  }
164 
165  pitch = width + 2 * MAX_VECTOR;
166  unsigned bufsize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
167 
168  oldframe.resize(bufsize);
169  newframe.resize(bufsize);
170  memset(oldframe.data(), 0, bufsize);
171  memset(newframe.data(), 0, bufsize);
172  work.resize(bufsize);
173  outputSize = neededSize();
174  output.resize(outputSize);
175 
176  assert((width % BLOCK_WIDTH ) == 0);
177  assert((height % BLOCK_HEIGHT) == 0);
178  unsigned xblocks = width / BLOCK_WIDTH;
179  unsigned yblocks = height / BLOCK_HEIGHT;
180  blockOffsets.resize(xblocks * yblocks);
181  for (unsigned y = 0; y < yblocks; ++y) {
182  for (unsigned x = 0; x < xblocks; ++x) {
183  blockOffsets[y * xblocks + x] =
184  ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
185  (x * BLOCK_WIDTH) + MAX_VECTOR;
186  }
187  }
188 }
189 
190 unsigned ZMBVEncoder::neededSize() const
191 {
192  unsigned f = pixelSize;
193  f = f * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
194  return f + f / 1000;
195 }
196 
197 template<class P>
198 unsigned ZMBVEncoder::possibleBlock(int vx, int vy, unsigned offset)
199 {
200  int ret = 0;
201  auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
202  auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
203  for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
204  for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
205  if (pold[x] != pnew[x]) ++ret;
206  }
207  pold += pitch * 4;
208  pnew += pitch * 4;
209  }
210  return ret;
211 }
212 
213 template<class P>
214 unsigned ZMBVEncoder::compareBlock(int vx, int vy, unsigned offset)
215 {
216  int ret = 0;
217  auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
218  auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
219  for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
220  for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
221  if (pold[x] != pnew[x]) ++ret;
222  }
223  pold += pitch;
224  pnew += pitch;
225  }
226  return ret;
227 }
228 
229 template<class P>
230 void ZMBVEncoder::addXorBlock(
231  const PixelOperations<P>& pixelOps, int vx, int vy, unsigned offset, unsigned& workUsed)
232 {
233  using LE_P = typename Endian::Little<P>::type;
234 
235  auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
236  auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
237  for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
238  for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
239  P pxor = pnew[x] ^ pold[x];
240  writePixel(pixelOps, pxor, *reinterpret_cast<LE_P*>(&work[workUsed]));
241  workUsed += sizeof(P);
242  }
243  pold += pitch;
244  pnew += pitch;
245  }
246 }
247 
248 template<class P>
249 void ZMBVEncoder::addXorFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
250 {
251  PixelOperations<P> pixelOps(pixelFormat);
252  auto* vectors = reinterpret_cast<int8_t*>(&work[workUsed]);
253 
254  unsigned xblocks = width / BLOCK_WIDTH;
255  unsigned yblocks = height / BLOCK_HEIGHT;
256  unsigned blockcount = xblocks * yblocks;
257 
258  // Align the following xor data on 4 byte boundary
259  workUsed = (workUsed + blockcount * 2 + 3) & ~3;
260 
261  int bestvx = 0;
262  int bestvy = 0;
263  for (unsigned b = 0; b < blockcount; ++b) {
264  unsigned offset = blockOffsets[b];
265  // first try best vector of previous block
266  unsigned bestchange = compareBlock<P>(bestvx, bestvy, offset);
267  if (bestchange >= 4) {
268  int possibles = 64;
269  for (auto& v : vectorTable) {
270  if (possibleBlock<P>(v.x, v.y, offset) < 4) {
271  unsigned testchange = compareBlock<P>(v.x, v.y, offset);
272  if (testchange < bestchange) {
273  bestchange = testchange;
274  bestvx = v.x;
275  bestvy = v.y;
276  if (bestchange < 4) break;
277  }
278  --possibles;
279  if (possibles == 0) break;
280  }
281  }
282  }
283  vectors[b * 2 + 0] = (bestvx << 1);
284  vectors[b * 2 + 1] = (bestvy << 1);
285  if (bestchange) {
286  vectors[b * 2 + 0] |= 1;
287  addXorBlock<P>(pixelOps, bestvx, bestvy, offset, workUsed);
288  }
289  }
290 }
291 
292 template<class P>
293 void ZMBVEncoder::addFullFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
294 {
295  using LE_P = typename Endian::Little<P>::type;
296 
297  PixelOperations<P> pixelOps(pixelFormat);
298  auto* readFrame =
299  &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
300  for (unsigned y = 0; y < height; ++y) {
301  auto* pixelsIn = reinterpret_cast<P*> (readFrame);
302  auto* pixelsOut = reinterpret_cast<LE_P*>(&work[workUsed]);
303  for (unsigned x = 0; x < width; ++x) {
304  writePixel(pixelOps, pixelsIn[x], pixelsOut[x]);
305  }
306  readFrame += pitch * sizeof(P);
307  workUsed += width * sizeof(P);
308  }
309 }
310 
311 const void* ZMBVEncoder::getScaledLine(FrameSource* frame, unsigned y, void* workBuf_) const
312 {
313 #if HAVE_32BPP
314  if (pixelSize == 4) { // 32bpp
315  auto* workBuf = static_cast<uint32_t*>(workBuf_);
316  switch (height) {
317  case 240:
318  return frame->getLinePtr320_240(y, workBuf);
319  case 480:
320  return frame->getLinePtr640_480(y, workBuf);
321  case 720:
322  return frame->getLinePtr960_720(y, workBuf);
323  default:
324  UNREACHABLE;
325  }
326  }
327 #endif
328 #if HAVE_16BPP
329  if (pixelSize == 2) { // 15bpp or 16bpp
330  auto* workBuf = static_cast<uint16_t*>(workBuf_);
331  switch (height) {
332  case 240:
333  return frame->getLinePtr320_240(y, workBuf);
334  case 480:
335  return frame->getLinePtr640_480(y, workBuf);
336  case 720:
337  return frame->getLinePtr960_720(y, workBuf);
338  default:
339  UNREACHABLE;
340  }
341  }
342 #endif
343  UNREACHABLE;
344  return nullptr; // avoid warning
345 }
346 
347 void ZMBVEncoder::compressFrame(bool keyFrame, FrameSource* frame,
348  void*& buffer, unsigned& written)
349 {
350  std::swap(newframe, oldframe); // replace oldframe with newframe
351 
352  // Reset the work buffer
353  unsigned workUsed = 0;
354  unsigned writeDone = 1;
355  uint8_t* writeBuf = output.data();
356 
357  output[0] = 0; // first byte contains info about this frame
358  if (keyFrame) {
359  output[0] |= FLAG_KEYFRAME;
360  auto* header = reinterpret_cast<KeyframeHeader*>(
361  writeBuf + writeDone);
363  header->low_version = DBZV_VERSION_LOW;
364  header->compression = COMPRESSION_ZLIB;
365  header->format = format;
366  header->blockwidth = BLOCK_WIDTH;
367  header->blockheight = BLOCK_HEIGHT;
368  writeDone += sizeof(KeyframeHeader);
369  deflateReset(&zstream); // restart deflate
370  }
371 
372  // copy lines (to add black border)
373  unsigned linePitch = pitch * pixelSize;
374  unsigned lineWidth = width * pixelSize;
375  uint8_t* dest =
376  &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
377  for (unsigned i = 0; i < height; ++i) {
378  auto* scaled = getScaledLine(frame, i, dest);
379  if (scaled != dest) memcpy(dest, scaled, lineWidth);
380  dest += linePitch;
381  }
382 
383  // Add the frame data.
384  if (keyFrame) {
385  // Key frame: full frame data.
386  switch (pixelSize) {
387 #if HAVE_16BPP
388  case 2:
389  addFullFrame<uint16_t>(frame->getPixelFormat(), workUsed);
390  break;
391 #endif
392 #if HAVE_32BPP
393  case 4:
394  addFullFrame<uint32_t>(frame->getPixelFormat(), workUsed);
395  break;
396 #endif
397  default:
398  UNREACHABLE;
399  }
400  } else {
401  // Non-key frame: delta frame data.
402  switch (pixelSize) {
403 #if HAVE_16BPP
404  case 2:
405  addXorFrame<uint16_t>(frame->getPixelFormat(), workUsed);
406  break;
407 #endif
408 #if HAVE_32BPP
409  case 4:
410  addXorFrame<uint32_t>(frame->getPixelFormat(), workUsed);
411  break;
412 #endif
413  default:
414  UNREACHABLE;
415  }
416  }
417  // Compress the frame data with zlib.
418  zstream.next_in = work.data();
419  zstream.avail_in = workUsed;
420  zstream.total_in = 0;
421 
422  zstream.next_out = static_cast<Bytef*>(writeBuf + writeDone);
423  zstream.avail_out = outputSize - writeDone;
424  zstream.total_out = 0;
425  auto r = deflate(&zstream, Z_SYNC_FLUSH);
426  assert(r == Z_OK); (void)r;
427 
428  buffer = output.data();
429  written = writeDone + zstream.total_out;
430 }
431 
432 } // namespace openmsx
openmsx::BLOCK_HEIGHT
constexpr unsigned BLOCK_HEIGHT
Definition: ZMBVEncoder.cc:21
openmsx::CodecVector
Definition: ZMBVEncoder.cc:24
openmsx::ZMBVEncoder::ZMBVEncoder
ZMBVEncoder(unsigned width, unsigned height, unsigned bpp)
Definition: ZMBVEncoder.cc:117
openmsx::PixelOperations< uint16_t >
openmsx::KeyframeHeader::blockwidth
uint8_t blockwidth
Definition: ZMBVEncoder.cc:58
openmsx::vectorTable
CodecVector vectorTable[VECTOR_TAB_SIZE]
Definition: ZMBVEncoder.cc:51
ranges::sort
void sort(RandomAccessRange &&range)
Definition: ranges.hh:35
FrameSource.hh
openmsx::PixelOperations::blue256
unsigned blue256(Pixel p) const
Definition: PixelOperations.hh:331
openmsx::KeyframeHeader::compression
uint8_t compression
Definition: ZMBVEncoder.cc:56
ZMBVEncoder.hh
ranges.hh
openmsx::COMPRESSION_ZLIB
constexpr uint8_t COMPRESSION_ZLIB
Definition: ZMBVEncoder.cc:18
openmsx::FLAG_KEYFRAME
constexpr unsigned FLAG_KEYFRAME
Definition: ZMBVEncoder.cc:22
openmsx::DBZV_VERSION_LOW
constexpr uint8_t DBZV_VERSION_LOW
Definition: ZMBVEncoder.cc:17
UNREACHABLE
#define UNREACHABLE
Definition: unreachable.hh:38
openmsx::ZMBVEncoder::compressFrame
void compressFrame(bool keyFrame, FrameSource *frame, void *&buffer, unsigned &written)
Definition: ZMBVEncoder.cc:347
openmsx::MemBuffer::resize
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:120
openmsx::KeyframeHeader::high_version
uint8_t high_version
Definition: ZMBVEncoder.cc:54
openmsx::CodecVector::cost
float cost() const
Definition: ZMBVEncoder.cc:25
openmsx::KeyframeHeader::blockheight
uint8_t blockheight
Definition: ZMBVEncoder.cc:59
openmsx::VECTOR_TAB_SIZE
constexpr unsigned VECTOR_TAB_SIZE
Definition: ZMBVEncoder.cc:47
openmsx::PixelOperations::green256
unsigned green256(Pixel p) const
Definition: PixelOperations.hh:322
openmsx::FrameSource::getPixelFormat
const PixelFormat & getPixelFormat() const
Definition: FrameSource.hh:189
openmsx::MAX_VECTOR
constexpr unsigned MAX_VECTOR
Definition: ZMBVEncoder.cc:19
endian.hh
Endian::EndianT
Definition: endian.hh:71
g
int g
Definition: ScopedAssign_test.cc:20
PixelOperations.hh
openmsx::KeyframeHeader::format
uint8_t format
Definition: ZMBVEncoder.cc:57
openmsx::CodecVector::x
int x
Definition: ZMBVEncoder.cc:39
openmsx::DBZV_VERSION_HIGH
constexpr uint8_t DBZV_VERSION_HIGH
Definition: ZMBVEncoder.cc:16
openmsx::x
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:1419
operator<
constexpr bool operator<(const uint128 &a, const uint128 &b)
Definition: uint128.hh:222
Endian::Little
Definition: endian.hh:280
openmsx::FrameSource
Interface for getting lines from a video frame.
Definition: FrameSource.hh:13
openmsx::PixelOperations::red256
unsigned red256(Pixel p) const
Same as above, but result is scaled to [0..255].
Definition: PixelOperations.hh:313
openmsx::MemBuffer::data
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:90
openmsx::BLOCK_WIDTH
constexpr unsigned BLOCK_WIDTH
Definition: ZMBVEncoder.cc:20
unreachable.hh
openmsx::KeyframeHeader::low_version
uint8_t low_version
Definition: ZMBVEncoder.cc:55
openmsx
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
openmsx::CodecVector::y
int y
Definition: ZMBVEncoder.cc:40
openmsx::KeyframeHeader
Definition: ZMBVEncoder.cc:53