openMSX
ZMBVEncoder.cc
Go to the documentation of this file.
1// Code based on DOSBox-0.65
2
3#include "ZMBVEncoder.hh"
4
5#include "FrameSource.hh"
6#include "PixelOperations.hh"
7
8#include "cstd.hh"
9#include "endian.hh"
10#include "narrow.hh"
11#include "ranges.hh"
12#include "unreachable.hh"
13
14#include <array>
15#include <bit>
16#include <cassert>
17#include <cstdint>
18#include <cstdlib>
19#include <cstring>
20#include <cmath>
21#include <tuple>
22
23namespace openmsx {
24
25static constexpr uint8_t DBZV_VERSION_HIGH = 0;
26static constexpr uint8_t DBZV_VERSION_LOW = 1;
27static constexpr uint8_t COMPRESSION_ZLIB = 1;
28static constexpr unsigned MAX_VECTOR = 16;
29static constexpr unsigned BLOCK_WIDTH = MAX_VECTOR;
30static constexpr unsigned BLOCK_HEIGHT = MAX_VECTOR;
31static constexpr unsigned FLAG_KEYFRAME = 0x01;
32
34 int8_t x;
35 int8_t y;
36};
37
38static constexpr unsigned VECTOR_TAB_SIZE =
39 1 + // center
40 8 * MAX_VECTOR + // horizontal, vertical, diagonal
41 MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
42
43static constexpr auto vectorTable = [] {
44 std::array<CodecVector, VECTOR_TAB_SIZE> result = {};
45
46 unsigned p = 0;
47 // center
48 result[p] = {0, 0};
49 p += 1;
50 // horizontal, vertical, diagonal
51 for (int i = 1; i <= int(MAX_VECTOR); ++i, p += 8) {
52 result[p + 0] = {int8_t( i), int8_t( 0)};
53 result[p + 1] = {int8_t(-i), int8_t( 0)};
54 result[p + 2] = {int8_t( 0), int8_t( i)};
55 result[p + 3] = {int8_t( 0), int8_t(-i)};
56 result[p + 4] = {int8_t( i), int8_t( i)};
57 result[p + 5] = {int8_t(-i), int8_t( i)};
58 result[p + 6] = {int8_t( i), int8_t(-i)};
59 result[p + 7] = {int8_t(-i), int8_t(-i)};
60 }
61 // rest
62 for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
63 for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
64 if (x == y) continue; // already have diagonal
65 result[p + 0] = {int8_t( x), int8_t( y)};
66 result[p + 1] = {int8_t(-x), int8_t( y)};
67 result[p + 2] = {int8_t( x), int8_t(-y)};
68 result[p + 3] = {int8_t(-x), int8_t(-y)};
69 p += 4;
70 }
71 }
72 assert(p == VECTOR_TAB_SIZE);
73
74 // sort
75 auto compare = [](const CodecVector& l, const CodecVector& r) {
76 auto cost = [](const CodecVector& v) {
77 auto c = cstd::sqrt(double(v.x * v.x + v.y * v.y));
78 if ((v.x == 0) || (v.y == 0)) {
79 // no penalty for purely horizontal/vertical offset
80 c *= 1.0;
81 } else if (cstd::abs(v.x) == cstd::abs(v.y)) {
82 // small penalty for pure diagonal
83 c *= 2.0;
84 } else {
85 // bigger penalty for 'random' direction
86 c *= 4.0;
87 }
88 return c;
89 };
90 return std::tuple(cost(l), l.x, l.y) <
91 std::tuple(cost(r), r.x, r.y);
92 };
93 ranges::sort(result, compare);
94
95 return result;
96}();
97
99 uint8_t high_version;
100 uint8_t low_version;
101 uint8_t compression;
102 uint8_t format;
103 uint8_t blockWidth;
104 uint8_t blockHeight;
105};
106
107
108static inline void writePixel(
109 unsigned pixel, Endian::L32& dest)
110{
111 PixelOperations pixelOps;
112 unsigned r = pixelOps.red(pixel);
113 unsigned g = pixelOps.green(pixel);
114 unsigned b = pixelOps.blue(pixel);
115 dest = (r << 16) | (g << 8) | b;
116}
117
118
119ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_)
120 : width(width_)
121 , height(height_)
122{
123 setupBuffers();
124 memset(&zstream, 0, sizeof(zstream));
125 deflateInit(&zstream, 6); // compression level
126
127 // I did a small test: compression level vs compression speed
128 // (recorded Space Manbow intro, video only)
129 //
130 // level | time | size
131 // ------+--------+----------
132 // 0 | 1m12.6 | 139442594
133 // 1 | 1m12.1 | 5217288
134 // 2 | 1m10.8 | 4887258
135 // 3 | 1m11.8 | 4610668
136 // 4 | 1m13.1 | 3791932 <-- old default
137 // 5 | 1m14.2 | 3602078
138 // 6 | 1m14.5 | 3363766 <-- current default
139 // 7 | 1m15.8 | 3333938
140 // 8 | 1m25.0 | 3301168
141 // 9 | 2m04.1 | 3253706
142 //
143 // Level 6 seems a good compromise between size/speed for THIS test.
144}
145
146void ZMBVEncoder::setupBuffers()
147{
148 static constexpr size_t pixelSize = sizeof(Pixel);
149
150 pitch = width + 2 * MAX_VECTOR;
151 auto bufSize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
152
153 oldFrame.resize(bufSize);
154 newFrame.resize(bufSize);
155 ranges::fill(std::span{oldFrame.data(), bufSize}, 0);
156 ranges::fill(std::span{newFrame.data(), bufSize}, 0);
157 work.resize(bufSize);
158 outputSize = neededSize();
159 output.resize(outputSize);
160
161 assert((width % BLOCK_WIDTH ) == 0);
162 assert((height % BLOCK_HEIGHT) == 0);
163 size_t xBlocks = width / BLOCK_WIDTH;
164 size_t yBlocks = height / BLOCK_HEIGHT;
165 blockOffsets.resize(xBlocks * yBlocks);
166 for (auto y : xrange(yBlocks)) {
167 for (auto x : xrange(xBlocks)) {
168 blockOffsets[y * xBlocks + x] =
169 ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
170 (x * BLOCK_WIDTH) + MAX_VECTOR;
171 }
172 }
173}
174
175unsigned ZMBVEncoder::neededSize() const
176{
177 static constexpr unsigned pixelSize = sizeof(Pixel);
178 unsigned f = pixelSize * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
179 return f + f / 1000;
180}
181
182unsigned ZMBVEncoder::possibleBlock(int vx, int vy, size_t offset)
183{
184 int ret = 0;
185 const auto* pOld = &(std::bit_cast<const Pixel*>(oldFrame.data()))[offset + (vy * pitch) + vx];
186 const auto* pNew = &(std::bit_cast<const Pixel*>(newFrame.data()))[offset];
187 for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
188 for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
189 if (pOld[x] != pNew[x]) ++ret;
190 }
191 pOld += pitch * 4;
192 pNew += pitch * 4;
193 }
194 return ret;
195}
196
197unsigned ZMBVEncoder::compareBlock(int vx, int vy, size_t offset)
198{
199 int ret = 0;
200 const auto* pOld = &(std::bit_cast<const Pixel*>(oldFrame.data()))[offset + (vy * pitch) + vx];
201 const auto* pNew = &(std::bit_cast<const Pixel*>(newFrame.data()))[offset];
202 repeat(BLOCK_HEIGHT, [&] {
203 for (auto x : xrange(BLOCK_WIDTH)) {
204 if (pOld[x] != pNew[x]) ++ret;
205 }
206 pOld += pitch;
207 pNew += pitch;
208 });
209 return ret;
210}
211
212void ZMBVEncoder::addXorBlock(int vx, int vy, size_t offset, unsigned& workUsed)
213{
214 using LE_P = typename Endian::Little<Pixel>::type;
215
216 const auto* pOld = &(std::bit_cast<const Pixel*>(oldFrame.data()))[offset + (vy * pitch) + vx];
217 const auto* pNew = &(std::bit_cast<const Pixel*>(newFrame.data()))[offset];
218 repeat(BLOCK_HEIGHT, [&] {
219 for (auto x : xrange(BLOCK_WIDTH)) {
220 auto pXor = pNew[x] ^ pOld[x];
221 writePixel(pXor, *std::bit_cast<LE_P*>(&work[workUsed]));
222 workUsed += sizeof(Pixel);
223 }
224 pOld += pitch;
225 pNew += pitch;
226 });
227}
228
229void ZMBVEncoder::addXorFrame(unsigned& workUsed)
230{
231 auto* vectors = std::bit_cast<int8_t*>(&work[workUsed]);
232
233 unsigned xBlocks = width / BLOCK_WIDTH;
234 unsigned yBlocks = height / BLOCK_HEIGHT;
235 unsigned blockCount = xBlocks * yBlocks;
236
237 // Align the following xor data on 4 byte boundary
238 workUsed = (workUsed + blockCount * 2 + 3) & ~3;
239
240 int bestVx = 0;
241 int bestVy = 0;
242 for (auto b : xrange(blockCount)) {
243 auto offset = blockOffsets[b];
244 // first try best vector of previous block
245 unsigned bestChange = compareBlock(bestVx, bestVy, offset);
246 if (bestChange >= 4) {
247 int possibles = 64;
248 for (const auto& v : vectorTable) {
249 if (possibleBlock(v.x, v.y, offset) < 4) {
250 if (auto testChange = compareBlock(v.x, v.y, offset);
251 testChange < bestChange) {
252 bestChange = testChange;
253 bestVx = narrow<int>(v.x);
254 bestVy = narrow<int>(v.y);
255 if (bestChange < 4) break;
256 }
257 --possibles;
258 if (possibles == 0) break;
259 }
260 }
261 }
262 vectors[b * 2 + 0] = narrow<int8_t>(bestVx << 1);
263 vectors[b * 2 + 1] = narrow<int8_t>(bestVy << 1);
264 if (bestChange) {
265 vectors[b * 2 + 0] |= 1;
266 addXorBlock(bestVx, bestVy, offset, workUsed);
267 }
268 }
269}
270
271void ZMBVEncoder::addFullFrame(unsigned& workUsed)
272{
273 using LE_P = typename Endian::Little<Pixel>::type;
274 static constexpr size_t pixelSize = sizeof(Pixel);
275
276 auto* readFrame =
277 &newFrame[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
278 repeat(height, [&] {
279 const auto* pixelsIn = std::bit_cast<const Pixel*>(readFrame);
280 auto* pixelsOut = std::bit_cast<LE_P*>(&work[workUsed]);
281 for (auto x : xrange(width)) {
282 writePixel(pixelsIn[x], pixelsOut[x]);
283 }
284 readFrame += pitch * sizeof(Pixel);
285 workUsed += narrow<unsigned>(width * sizeof(Pixel));
286 });
287}
288
289const ZMBVEncoder::Pixel* ZMBVEncoder::getScaledLine(const FrameSource* frame, unsigned y, Pixel* workBuf) const
290{
291 switch (height) {
292 case 240:
293 return frame->getLinePtr320_240(y, std::span<uint32_t, 320>(workBuf, 320)).data();
294 case 480:
295 return frame->getLinePtr640_480(y, std::span<uint32_t, 640>(workBuf, 640)).data();
296 case 720:
297 return frame->getLinePtr960_720(y, std::span<uint32_t, 960>(workBuf, 960)).data();
298 default:
300 }
301}
302
303std::span<const uint8_t> ZMBVEncoder::compressFrame(bool keyFrame, const FrameSource* frame)
304{
305 std::swap(newFrame, oldFrame); // replace oldFrame with newFrame
306
307 // Reset the work buffer
308 unsigned workUsed = 0;
309 unsigned writeDone = 1;
310 uint8_t* writeBuf = output.data();
311
312 output[0] = 0; // first byte contains info about this frame
313 if (keyFrame) {
314 static const uint8_t ZMBV_FORMAT_32BPP = 8;
315
316 output[0] |= FLAG_KEYFRAME;
317 auto* header = std::bit_cast<KeyframeHeader*>(
318 writeBuf + writeDone);
319 header->high_version = DBZV_VERSION_HIGH;
320 header->low_version = DBZV_VERSION_LOW;
321 header->compression = COMPRESSION_ZLIB;
322 header->format = ZMBV_FORMAT_32BPP;
323 header->blockWidth = BLOCK_WIDTH;
324 header->blockHeight = BLOCK_HEIGHT;
325 writeDone += sizeof(KeyframeHeader);
326 deflateReset(&zstream); // restart deflate
327 }
328
329 // copy lines (to add black border)
330 static constexpr size_t pixelSize = sizeof(Pixel);
331 auto linePitch = pitch * pixelSize;
332 auto lineWidth = size_t(width) * pixelSize;
333 uint8_t* dest =
334 &newFrame[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
335 for (auto i : xrange(height)) {
336 const auto* scaled = std::bit_cast<const uint8_t*>(
337 getScaledLine(frame, i, std::bit_cast<Pixel*>(dest)));
338 if (scaled != dest) memcpy(dest, scaled, lineWidth);
339 dest += linePitch;
340 }
341
342 // Add the frame data.
343 if (keyFrame) {
344 // Key frame: full frame data.
345 addFullFrame(workUsed);
346 } else {
347 // Non-key frame: delta frame data.
348 addXorFrame(workUsed);
349 }
350 // Compress the frame data with zlib.
351 zstream.next_in = work.data();
352 zstream.avail_in = workUsed;
353 zstream.total_in = 0;
354
355 zstream.next_out = std::bit_cast<Bytef*>(writeBuf + writeDone);
356 zstream.avail_out = outputSize - writeDone;
357 zstream.total_out = 0;
358 auto r = deflate(&zstream, Z_SYNC_FLUSH);
359 assert(r == Z_OK); (void)r;
360
361 return {output.data(), writeDone + zstream.total_out};
362}
363
364} // namespace openmsx
int g
Interface for getting lines from a video frame.
void resize(size_t size)
Grow or shrink the memory block.
Definition MemBuffer.hh:111
const T * data() const
Returns pointer to the start of the memory buffer.
Definition MemBuffer.hh:81
unsigned green(Pixel p) const
unsigned red(Pixel p) const
Extract RGBA components, each in range [0..255].
unsigned blue(Pixel p) const
ZMBVEncoder(unsigned width, unsigned height)
std::span< const uint8_t > compressFrame(bool keyFrame, const FrameSource *frame)
constexpr double sqrt(double x)
Definition cstd.hh:261
constexpr T abs(T t)
Definition cstd.hh:17
This file implemented 3 utility functions:
Definition Autofire.cc:11
CharacterConverter::Pixel Pixel
constexpr void fill(ForwardRange &&range, const T &value)
Definition ranges.hh:315
constexpr void sort(RandomAccessRange &&range)
Definition ranges.hh:51
#define UNREACHABLE
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
Definition xrange.hh:147
constexpr auto xrange(T e)
Definition xrange.hh:132