openMSX
ZMBVEncoder.cc
Go to the documentation of this file.
1// Code based on DOSBox-0.65
2
3#include "ZMBVEncoder.hh"
4#include "FrameSource.hh"
5#include "PixelOperations.hh"
6#include "cstd.hh"
7#include "endian.hh"
8#include "narrow.hh"
9#include "ranges.hh"
10#include "unreachable.hh"
11#include <array>
12#include <cassert>
13#include <cstdint>
14#include <cstdlib>
15#include <cstring>
16#include <cmath>
17#include <tuple>
18
19namespace openmsx {
20
21static constexpr uint8_t DBZV_VERSION_HIGH = 0;
22static constexpr uint8_t DBZV_VERSION_LOW = 1;
23static constexpr uint8_t COMPRESSION_ZLIB = 1;
24static constexpr unsigned MAX_VECTOR = 16;
25static constexpr unsigned BLOCK_WIDTH = MAX_VECTOR;
26static constexpr unsigned BLOCK_HEIGHT = MAX_VECTOR;
27static constexpr unsigned FLAG_KEYFRAME = 0x01;
28
30 int8_t x;
31 int8_t y;
32};
33
34static constexpr unsigned VECTOR_TAB_SIZE =
35 1 + // center
36 8 * MAX_VECTOR + // horizontal, vertical, diagonal
37 MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
38
39static constexpr auto vectorTable = [] {
40 std::array<CodecVector, VECTOR_TAB_SIZE> result = {};
41
42 unsigned p = 0;
43 // center
44 result[p] = {0, 0};
45 p += 1;
46 // horizontal, vertical, diagonal
47 for (int i = 1; i <= int(MAX_VECTOR); ++i, p += 8) {
48 result[p + 0] = {int8_t( i), int8_t( 0)};
49 result[p + 1] = {int8_t(-i), int8_t( 0)};
50 result[p + 2] = {int8_t( 0), int8_t( i)};
51 result[p + 3] = {int8_t( 0), int8_t(-i)};
52 result[p + 4] = {int8_t( i), int8_t( i)};
53 result[p + 5] = {int8_t(-i), int8_t( i)};
54 result[p + 6] = {int8_t( i), int8_t(-i)};
55 result[p + 7] = {int8_t(-i), int8_t(-i)};
56 }
57 // rest
58 for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
59 for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
60 if (x == y) continue; // already have diagonal
61 result[p + 0] = {int8_t( x), int8_t( y)};
62 result[p + 1] = {int8_t(-x), int8_t( y)};
63 result[p + 2] = {int8_t( x), int8_t(-y)};
64 result[p + 3] = {int8_t(-x), int8_t(-y)};
65 p += 4;
66 }
67 }
68 assert(p == VECTOR_TAB_SIZE);
69
70 // sort
71 auto compare = [](const CodecVector& l, const CodecVector& r) {
72 auto cost = [](const CodecVector& v) {
73 auto c = cstd::sqrt(double(v.x * v.x + v.y * v.y));
74 if ((v.x == 0) || (v.y == 0)) {
75 // no penalty for purely horizontal/vertical offset
76 c *= 1.0;
77 } else if (cstd::abs(v.x) == cstd::abs(v.y)) {
78 // small penalty for pure diagonal
79 c *= 2.0;
80 } else {
81 // bigger penalty for 'random' direction
82 c *= 4.0;
83 }
84 return c;
85 };
86 return std::tuple(cost(l), l.x, l.y) <
87 std::tuple(cost(r), r.x, r.y);
88 };
89 ranges::sort(result, compare);
90
91 return result;
92}();
93
95 uint8_t high_version;
96 uint8_t low_version;
97 uint8_t compression;
98 uint8_t format;
99 uint8_t blockWidth;
100 uint8_t blockHeight;
101};
102
103
104static inline void writePixel(
105 const PixelOperations<uint16_t>& pixelOps,
106 uint16_t pixel, Endian::L16& dest)
107{
108 unsigned r = pixelOps.red256(pixel);
109 unsigned g = pixelOps.green256(pixel);
110 unsigned b = pixelOps.blue256(pixel);
111 dest = narrow<uint16_t>(((r & 0xF8) << (11 - 3)) | ((g & 0xFC) << (5 - 2)) | (b >> 3));
112}
113
114static inline void writePixel(
115 const PixelOperations<unsigned>& pixelOps,
116 unsigned pixel, Endian::L32& dest)
117{
118 unsigned r = pixelOps.red256(pixel);
119 unsigned g = pixelOps.green256(pixel);
120 unsigned b = pixelOps.blue256(pixel);
121 dest = (r << 16) | (g << 8) | b;
122}
123
124
125ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_, unsigned bpp)
126 : width(width_)
127 , height(height_)
128{
129 setupBuffers(bpp);
130 memset(&zstream, 0, sizeof(zstream));
131 deflateInit(&zstream, 6); // compression level
132
133 // I did a small test: compression level vs compression speed
134 // (recorded Space Manbow intro, video only)
135 //
136 // level | time | size
137 // ------+--------+----------
138 // 0 | 1m12.6 | 139442594
139 // 1 | 1m12.1 | 5217288
140 // 2 | 1m10.8 | 4887258
141 // 3 | 1m11.8 | 4610668
142 // 4 | 1m13.1 | 3791932 <-- old default
143 // 5 | 1m14.2 | 3602078
144 // 6 | 1m14.5 | 3363766 <-- current default
145 // 7 | 1m15.8 | 3333938
146 // 8 | 1m25.0 | 3301168
147 // 9 | 2m04.1 | 3253706
148 //
149 // Level 6 seems a good compromise between size/speed for THIS test.
150}
151
152void ZMBVEncoder::setupBuffers(unsigned bpp)
153{
154 switch (bpp) {
155#if HAVE_16BPP
156 case 15:
157 case 16:
158 format = ZMBV_FORMAT_16BPP;
159 pixelSize = 2;
160 break;
161#endif
162#if HAVE_32BPP
163 case 32:
164 format = ZMBV_FORMAT_32BPP;
165 pixelSize = 4;
166 break;
167#endif
168 default:
170 }
171
172 pitch = width + 2 * MAX_VECTOR;
173 auto bufSize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
174
175 oldFrame.resize(bufSize);
176 newFrame.resize(bufSize);
177 ranges::fill(std::span{oldFrame.data(), bufSize}, 0);
178 ranges::fill(std::span{newFrame.data(), bufSize}, 0);
179 work.resize(bufSize);
180 outputSize = neededSize();
181 output.resize(outputSize);
182
183 assert((width % BLOCK_WIDTH ) == 0);
184 assert((height % BLOCK_HEIGHT) == 0);
185 size_t xBlocks = width / BLOCK_WIDTH;
186 size_t yBlocks = height / BLOCK_HEIGHT;
187 blockOffsets.resize(xBlocks * yBlocks);
188 for (auto y : xrange(yBlocks)) {
189 for (auto x : xrange(xBlocks)) {
190 blockOffsets[y * xBlocks + x] =
191 ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
192 (x * BLOCK_WIDTH) + MAX_VECTOR;
193 }
194 }
195}
196
197unsigned ZMBVEncoder::neededSize() const
198{
199 unsigned f = pixelSize;
200 f = f * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
201 return f + f / 1000;
202}
203
204template<std::unsigned_integral P>
205unsigned ZMBVEncoder::possibleBlock(int vx, int vy, size_t offset)
206{
207 int ret = 0;
208 auto* pOld = &(reinterpret_cast<P*>(oldFrame.data()))[offset + (vy * pitch) + vx];
209 auto* pNew = &(reinterpret_cast<P*>(newFrame.data()))[offset];
210 for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
211 for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
212 if (pOld[x] != pNew[x]) ++ret;
213 }
214 pOld += pitch * 4;
215 pNew += pitch * 4;
216 }
217 return ret;
218}
219
220template<std::unsigned_integral P>
221unsigned ZMBVEncoder::compareBlock(int vx, int vy, size_t offset)
222{
223 int ret = 0;
224 auto* pOld = &(reinterpret_cast<P*>(oldFrame.data()))[offset + (vy * pitch) + vx];
225 auto* pNew = &(reinterpret_cast<P*>(newFrame.data()))[offset];
226 repeat(BLOCK_HEIGHT, [&] {
227 for (auto x : xrange(BLOCK_WIDTH)) {
228 if (pOld[x] != pNew[x]) ++ret;
229 }
230 pOld += pitch;
231 pNew += pitch;
232 });
233 return ret;
234}
235
236template<std::unsigned_integral P>
237void ZMBVEncoder::addXorBlock(
238 const PixelOperations<P>& pixelOps, int vx, int vy, size_t offset, unsigned& workUsed)
239{
240 using LE_P = typename Endian::Little<P>::type;
241
242 auto* pOld = &(reinterpret_cast<P*>(oldFrame.data()))[offset + (vy * pitch) + vx];
243 auto* pNew = &(reinterpret_cast<P*>(newFrame.data()))[offset];
244 repeat(BLOCK_HEIGHT, [&] {
245 for (auto x : xrange(BLOCK_WIDTH)) {
246 P pXor = pNew[x] ^ pOld[x];
247 writePixel(pixelOps, pXor, *reinterpret_cast<LE_P*>(&work[workUsed]));
248 workUsed += sizeof(P);
249 }
250 pOld += pitch;
251 pNew += pitch;
252 });
253}
254
255template<std::unsigned_integral P>
256void ZMBVEncoder::addXorFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
257{
258 PixelOperations<P> pixelOps(pixelFormat);
259 auto* vectors = reinterpret_cast<int8_t*>(&work[workUsed]);
260
261 unsigned xBlocks = width / BLOCK_WIDTH;
262 unsigned yBlocks = height / BLOCK_HEIGHT;
263 unsigned blockCount = xBlocks * yBlocks;
264
265 // Align the following xor data on 4 byte boundary
266 workUsed = (workUsed + blockCount * 2 + 3) & ~3;
267
268 int bestVx = 0;
269 int bestVy = 0;
270 for (auto b : xrange(blockCount)) {
271 auto offset = blockOffsets[b];
272 // first try best vector of previous block
273 unsigned bestChange = compareBlock<P>(bestVx, bestVy, offset);
274 if (bestChange >= 4) {
275 int possibles = 64;
276 for (const auto& v : vectorTable) {
277 if (possibleBlock<P>(v.x, v.y, offset) < 4) {
278 unsigned testChange = compareBlock<P>(v.x, v.y, offset);
279 if (testChange < bestChange) {
280 bestChange = testChange;
281 bestVx = narrow<int>(v.x);
282 bestVy = narrow<int>(v.y);
283 if (bestChange < 4) break;
284 }
285 --possibles;
286 if (possibles == 0) break;
287 }
288 }
289 }
290 vectors[b * 2 + 0] = narrow<int8_t>(bestVx << 1);
291 vectors[b * 2 + 1] = narrow<int8_t>(bestVy << 1);
292 if (bestChange) {
293 vectors[b * 2 + 0] |= 1;
294 addXorBlock<P>(pixelOps, bestVx, bestVy, offset, workUsed);
295 }
296 }
297}
298
299template<std::unsigned_integral P>
300void ZMBVEncoder::addFullFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
301{
302 using LE_P = typename Endian::Little<P>::type;
303
304 PixelOperations<P> pixelOps(pixelFormat);
305 auto* readFrame =
306 &newFrame[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
307 repeat(height, [&] {
308 auto* pixelsIn = reinterpret_cast<P*> (readFrame);
309 auto* pixelsOut = reinterpret_cast<LE_P*>(&work[workUsed]);
310 for (auto x : xrange(width)) {
311 writePixel(pixelOps, pixelsIn[x], pixelsOut[x]);
312 }
313 readFrame += pitch * sizeof(P);
314 workUsed += narrow<unsigned>(width * sizeof(P));
315 });
316}
317
318const void* ZMBVEncoder::getScaledLine(FrameSource* frame, unsigned y, void* workBuf_) const
319{
320#if HAVE_32BPP
321 if (pixelSize == 4) { // 32bpp
322 auto* workBuf = static_cast<uint32_t*>(workBuf_);
323 switch (height) {
324 case 240:
325 return frame->getLinePtr320_240(y, std::span<uint32_t, 320>(workBuf, 320)).data();
326 case 480:
327 return frame->getLinePtr640_480(y, std::span<uint32_t, 640>(workBuf, 640)).data();
328 case 720:
329 return frame->getLinePtr960_720(y, std::span<uint32_t, 960>(workBuf, 960)).data();
330 default:
332 }
333 }
334#endif
335#if HAVE_16BPP
336 if (pixelSize == 2) { // 15bpp or 16bpp
337 auto* workBuf = static_cast<uint16_t*>(workBuf_);
338 switch (height) {
339 case 240:
340 return frame->getLinePtr320_240(y, std::span<uint16_t, 320>(workBuf, 320)).data();
341 case 480:
342 return frame->getLinePtr640_480(y, std::span<uint16_t, 640>(workBuf, 640)).data();
343 case 720:
344 return frame->getLinePtr960_720(y, std::span<uint16_t, 960>(workBuf, 960)).data();
345 default:
347 }
348 }
349#endif
351 return nullptr; // avoid warning
352}
353
354std::span<const uint8_t> ZMBVEncoder::compressFrame(bool keyFrame, FrameSource* frame)
355{
356 std::swap(newFrame, oldFrame); // replace oldFrame with newFrame
357
358 // Reset the work buffer
359 unsigned workUsed = 0;
360 unsigned writeDone = 1;
361 uint8_t* writeBuf = output.data();
362
363 output[0] = 0; // first byte contains info about this frame
364 if (keyFrame) {
365 output[0] |= FLAG_KEYFRAME;
366 auto* header = reinterpret_cast<KeyframeHeader*>(
367 writeBuf + writeDone);
368 header->high_version = DBZV_VERSION_HIGH;
369 header->low_version = DBZV_VERSION_LOW;
370 header->compression = COMPRESSION_ZLIB;
371 header->format = format;
372 header->blockWidth = BLOCK_WIDTH;
373 header->blockHeight = BLOCK_HEIGHT;
374 writeDone += sizeof(KeyframeHeader);
375 deflateReset(&zstream); // restart deflate
376 }
377
378 // copy lines (to add black border)
379 auto linePitch = pitch * pixelSize;
380 auto lineWidth = size_t(width) * pixelSize;
381 uint8_t* dest =
382 &newFrame[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
383 for (auto i : xrange(height)) {
384 const auto* scaled = getScaledLine(frame, i, dest);
385 if (scaled != dest) memcpy(dest, scaled, lineWidth);
386 dest += linePitch;
387 }
388
389 // Add the frame data.
390 if (keyFrame) {
391 // Key frame: full frame data.
392 switch (pixelSize) {
393#if HAVE_16BPP
394 case 2:
395 addFullFrame<uint16_t>(frame->getPixelFormat(), workUsed);
396 break;
397#endif
398#if HAVE_32BPP
399 case 4:
400 addFullFrame<uint32_t>(frame->getPixelFormat(), workUsed);
401 break;
402#endif
403 default:
405 }
406 } else {
407 // Non-key frame: delta frame data.
408 switch (pixelSize) {
409#if HAVE_16BPP
410 case 2:
411 addXorFrame<uint16_t>(frame->getPixelFormat(), workUsed);
412 break;
413#endif
414#if HAVE_32BPP
415 case 4:
416 addXorFrame<uint32_t>(frame->getPixelFormat(), workUsed);
417 break;
418#endif
419 default:
421 }
422 }
423 // Compress the frame data with zlib.
424 zstream.next_in = work.data();
425 zstream.avail_in = workUsed;
426 zstream.total_in = 0;
427
428 zstream.next_out = static_cast<Bytef*>(writeBuf + writeDone);
429 zstream.avail_out = outputSize - writeDone;
430 zstream.total_out = 0;
431 auto r = deflate(&zstream, Z_SYNC_FLUSH);
432 assert(r == Z_OK); (void)r;
433
434 return {output.data(), writeDone + zstream.total_out};
435}
436
437} // namespace openmsx
int g
Interface for getting lines from a video frame.
Definition: FrameSource.hh:20
const PixelFormat & getPixelFormat() const
Definition: FrameSource.hh:151
void resize(size_t size)
Grow or shrink the memory block.
Definition: MemBuffer.hh:111
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:81
unsigned blue256(Pixel p) const
unsigned red256(Pixel p) const
Same as above, but result is scaled to [0..255].
unsigned green256(Pixel p) const
std::span< const uint8_t > compressFrame(bool keyFrame, FrameSource *frame)
Definition: ZMBVEncoder.cc:354
ZMBVEncoder(unsigned width, unsigned height, unsigned bpp)
Definition: ZMBVEncoder.cc:125
constexpr double sqrt(double x)
Definition: cstd.hh:261
constexpr T abs(T t)
Definition: cstd.hh:17
This file implemented 3 utility functions:
Definition: Autofire.cc:9
Pixel writePixel(uint32_t p)
Definition: HQCommon.hh:32
constexpr void fill(ForwardRange &&range, const T &value)
Definition: ranges.hh:287
constexpr void sort(RandomAccessRange &&range)
Definition: ranges.hh:49
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
#define UNREACHABLE
Definition: unreachable.hh:38
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
Definition: xrange.hh:147
constexpr auto xrange(T e)
Definition: xrange.hh:132