openMSX
HQCommon.hh
Go to the documentation of this file.
1#ifndef HQCOMMON_HH
2#define HQCOMMON_HH
3
4#include "FrameSource.hh"
5#include "ScalerOutput.hh"
6#include "LineScalers.hh"
7#include "PixelOperations.hh"
8#include "endian.hh"
9#include "vla.hh"
10#include "xrange.hh"
11#include <algorithm>
12#include <cassert>
13#include <cstdint>
14
15namespace openmsx {
16
17template<std::unsigned_integral Pixel>
18[[nodiscard]] inline uint32_t readPixel(Pixel p)
19{
20 // TODO: Use surface info instead.
21 if constexpr (sizeof(Pixel) == 2) {
22 return ((p & 0xF800) << 8) |
23 ((p & 0x07C0) << 5) | // drop lowest green bit
24 ((p & 0x001F) << 3);
25 } else {
26 return p & 0xF8F8F8F8;
27 }
28}
29
30template<std::unsigned_integral Pixel>
31inline Pixel writePixel(uint32_t p)
32{
33 // TODO: Use surface info instead.
34 if constexpr (sizeof(Pixel) == 2) {
35 return ((p & 0xF80000) >> 8) |
36 ((p & 0x00FC00) >> 5) |
37 ((p & 0x0000F8) >> 3);
38 } else {
39 return (p & 0xF8F8F8F8) | ((p & 0xE0E0E0E0) >> 5);
40 }
41}
42
43class EdgeHQ
44{
45public:
46 EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
47 : shiftR(shiftR_), shiftG(shiftG_), shiftB(shiftB_)
48 {
49 }
50
51 [[nodiscard]] inline bool operator()(uint32_t c1, uint32_t c2) const
52 {
53 if (c1 == c2) return false;
54
55 unsigned r1 = (c1 >> shiftR) & 0xFF;
56 unsigned g1 = (c1 >> shiftG) & 0xFF;
57 unsigned b1 = (c1 >> shiftB) & 0xFF;
58
59 unsigned r2 = (c2 >> shiftR) & 0xFF;
60 unsigned g2 = (c2 >> shiftG) & 0xFF;
61 unsigned b2 = (c2 >> shiftB) & 0xFF;
62
63 int dr = r1 - r2;
64 int dg = g1 - g2;
65 int db = b1 - b2;
66
67 int dy = dr + dg + db;
68 if (dy < -0xC0 || dy > 0xC0) return true;
69
70 int du = dr - db;
71 if (du < -0x1C || du > 0x1C) return true;
72
73 int dv = 3 * dg - dy;
74 if (dv < -0x30 || dv > 0x30) return true;
75
76 return false;
77 }
78private:
79 const unsigned shiftR;
80 const unsigned shiftG;
81 const unsigned shiftB;
82};
83
84template<std::unsigned_integral Pixel>
86{
87 if constexpr (sizeof(Pixel) == 2) {
88 return EdgeHQ(0, 8, 16);
89 } else {
90 return EdgeHQ(pixelOps.getRshift(),
91 pixelOps.getGshift(),
92 pixelOps.getBshift());
93 }
94}
95
97{
98 [[nodiscard]] inline bool operator()(uint32_t c1, uint32_t c2) const
99 {
100 return c1 != c2;
101 }
102};
103
104template<typename EdgeOp>
105void calcEdgesGL(const uint32_t* __restrict curr, const uint32_t* __restrict next,
106 Endian::L32* __restrict edges2, EdgeOp edgeOp)
107{
108 // Consider a grid of 3x3 pixels, numbered like this:
109 // 1 | 2 | 3
110 // ---A---B---
111 // 4 | 5 | 6
112 // ---C---D---
113 // 7 | 8 | 9
114 // Then we calculate 12 'edges':
115 // * 8 star-edges, from the central pixel '5' to the 8 neighbouring pixels.
116 // Let's call these edges 1, 2, 3, 4, 6, 7, 8, 9 (note: 5 is skipped).
117 // * 4 cross-edges, between pixels (2,4), (2,6), (4,8), (6,8).
118 // Let's call these respectively A, B, C, D.
119 // An edge between two pixels means the color of the two pixels is sufficiently distant.
120 // * For the HQ scaler see 'EdgeHQ' for the definition of this distance function.
121 // * The HQlite scaler uses a much simpler distance function.
122 //
123 // We store these 12 edges in a 16-bit value and order them like this:
124 // (MSB (bit 15) on the left, LSB (bit 0) on the left, 'x' means bit is not used)
125 // || B 3 6 9 | D 2 x x || 8 1 A 4 | C 7 x x ||
126 // This order has two important properties:
127 // * The 12 bits are split in 2 groups of 6 bits and each group is MSB
128 // aligned within a byte. This allows to upload this data as a
129 // openGL texture and interpret each texel as a vec2 which
130 // represents a texture coordinate in another 64x64 texture.
131 // * This order allows to calculate the edges incrementally:
132 // Suppose we already calculated the edges for the pixel immediate
133 // above and immediately to the left of the current pixel. Then the edges
134 // (1, 2, 3, A, B) can be calculated as: (upper << 3) & 0xc460
135 // and (4, 7, C) can be calculated as: (left >> 9) & 0x001C
136 // And only edges (6, 8, 9, D) must be newly calculated for this pixel.
137 // So only 4 new edges per pixel instead of all 12.
138 //
139 // This function takes as input:
140 // * an in/out-array 'edges2':
141 // This contains the edge information for the upper row of pixels.
142 // And it gets update in-place to the edge information of the current
143 // row of pixels.
144 // * 2 rows of input pixels: the middle and the lower pixel rows.
145 // * An edge-function (to distinguish 'hq' from 'hqlite').
146
147 using Pixel = uint32_t;
148
149 uint32_t pattern = 0;
150 Pixel c5 = curr[0];
151 Pixel c8 = next[0];
152 if (edgeOp(c5, c8)) pattern |= 0x1800'0000; // edges: 9,D (right pixel)
153
154 for (auto xx : xrange((320 - 2) / 2)) {
155 pattern = (pattern >> (16 + 9)) & 0x001C; // edges: 6,D,9 -> 4,7,C (left pixel)
156 pattern |= (edges2[xx] << 3) & 0xC460'C460; // edges C,8,D,7,9 -> 1,2,3,A,B (left and right)
157
158 if (edgeOp(c5, c8)) pattern |= 0x0000'0080; // edge: 8 (left)
159 Pixel c6 = curr[2 * xx + 1];
160 if (edgeOp(c6, c8)) pattern |= 0x0004'0800; // edge: D (left), 7 (right)
161 if (edgeOp(c5, c6)) pattern |= 0x0010'2000; // edge: 6 (left), 4 (right)
162 Pixel c9 = next[2 * xx + 1];
163 if (edgeOp(c5, c9)) pattern |= 0x0008'1000; // edge: 9 (left), C (right)
164
165 if (edgeOp(c6, c9)) pattern |= 0x0080'0000; // edge: 8 (right)
166 c5 = curr[2 * xx + 2];
167 if (edgeOp(c5, c9)) pattern |= 0x0800'0000; // edge: D (right)
168 if (edgeOp(c6, c5)) pattern |= 0x2000'0000; // edge: 6 (right)
169 c8 = next[2 * xx + 2];
170 if (edgeOp(c6, c8)) pattern |= 0x1000'0000; // edge: 9 (right)
171
172 edges2[xx] = pattern;
173 }
174
175 pattern = (pattern >> (16 + 9)) & 0x001C; // edges: 6,D,9 -> 4,7,C (left pixel)
176 pattern |= (edges2[159] << 3) & 0xC460'C460; // edges: C,8,D,7,9 -> 1,2,3,A,B (left and right)
177
178 if (edgeOp(c5, c8)) pattern |= 0x0000'0080; // edge: 8 (left)
179 Pixel c6 = curr[319];
180 if (edgeOp(c6, c8)) pattern |= 0x0004'0800; // edge: D (left), 7 (right)
181 if (edgeOp(c5, c6)) pattern |= 0x0010'2000; // edge: 6 (left), 4 (right)
182 Pixel c9 = next[319];
183 if (edgeOp(c5, c9)) pattern |= 0x0008'1000; // edge: 9 (left), C (right)
184
185 if (edgeOp(c6, c9)) pattern |= 0x1880'0000; // edges: 8,9,D (right)
186
187 edges2[159] = pattern;
188}
189
190template<std::unsigned_integral Pixel, typename EdgeOp>
192 const Pixel* __restrict srcPrev, const Pixel* __restrict srcCurr,
193 unsigned srcWidth, unsigned* __restrict edgeBuf, EdgeOp edgeOp)
194{
195 unsigned x = 0;
196 uint32_t c1 = readPixel(srcPrev[x]);
197 uint32_t c2 = readPixel(srcCurr[x]);
198 unsigned pattern = edgeOp(c1, c2) ? ((1 << 6) | (1 << 7)) : 0;
199 for (/* */; x < (srcWidth - 1); ++x) {
200 pattern >>= 6;
201 uint32_t n1 = readPixel(srcPrev[x + 1]);
202 uint32_t n2 = readPixel(srcCurr[x + 1]);
203 if (edgeOp(c1, c2)) pattern |= (1 << 5);
204 if (edgeOp(c1, n2)) pattern |= (1 << 6);
205 if (edgeOp(c2, n1)) pattern |= (1 << 7);
206 edgeBuf[x] = pattern;
207 c1 = n1; c2 = n2;
208 }
209 pattern >>= 6;
210 if (edgeOp(c1, c2)) pattern |= (1 << 5) | (1 << 6) | (1 << 7);
211 edgeBuf[x] = pattern;
212}
213
214template<std::unsigned_integral Pixel, typename HQScale, typename EdgeOp>
215void doHQScale2(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
216 FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
217 ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
218{
219 VLA(unsigned, edgeBuf, srcWidth);
220 VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
221 VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
222 VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
223 VLA_SSE_ALIGNED(Pixel, bufA, 2 * srcWidth);
224 VLA_SSE_ALIGNED(Pixel, bufB, 2 * srcWidth);
225
226 int srcY = srcStartY;
227 auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
228 auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
229
230 calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
231
232 bool isCopy = postScale.isCopy();
233 for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
234 auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
235 auto* dst0 = dst.acquireLine(dstY + 0);
236 auto* dst1 = dst.acquireLine(dstY + 1);
237 if (isCopy) {
238 hqScale(srcPrev, srcCurr, srcNext, dst0, dst1,
239 srcWidth, edgeBuf, edgeOp);
240 } else {
241 hqScale(srcPrev, srcCurr, srcNext, bufA, bufB,
242 srcWidth, edgeBuf, edgeOp);
243 postScale(bufA, dst0, dstWidth);
244 postScale(bufB, dst1, dstWidth);
245 }
246 dst.releaseLine(dstY + 0, dst0);
247 dst.releaseLine(dstY + 1, dst1);
248 srcPrev = srcCurr;
249 srcCurr = srcNext;
250 std::swap(buf1, buf2);
251 std::swap(buf2, buf3);
252 }
253}
254
255template<std::unsigned_integral Pixel, typename HQScale, typename EdgeOp>
256void doHQScale3(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
257 FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
258 ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
259{
260 VLA(unsigned, edgeBuf, srcWidth);
261 VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
262 VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
263 VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
264 VLA_SSE_ALIGNED(Pixel, bufA, 3 * srcWidth);
265 VLA_SSE_ALIGNED(Pixel, bufB, 3 * srcWidth);
266 VLA_SSE_ALIGNED(Pixel, bufC, 3 * srcWidth);
267
268 int srcY = srcStartY;
269 auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
270 auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
271
272 calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
273
274 bool isCopy = postScale.isCopy();
275 for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 3) {
276 auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
277 auto* dst0 = dst.acquireLine(dstY + 0);
278 auto* dst1 = dst.acquireLine(dstY + 1);
279 auto* dst2 = dst.acquireLine(dstY + 2);
280 if (isCopy) {
281 hqScale(srcPrev, srcCurr, srcNext, dst0, dst1, dst2,
282 srcWidth, edgeBuf, edgeOp);
283 } else {
284 hqScale(srcPrev, srcCurr, srcNext, bufA, bufB, bufC,
285 srcWidth, edgeBuf, edgeOp);
286 postScale(bufA, dst0, dstWidth);
287 postScale(bufB, dst1, dstWidth);
288 postScale(bufC, dst2, dstWidth);
289 }
290 dst.releaseLine(dstY + 0, dst0);
291 dst.releaseLine(dstY + 1, dst1);
292 dst.releaseLine(dstY + 2, dst2);
293 srcPrev = srcCurr;
294 srcCurr = srcNext;
295 std::swap(buf1, buf2);
296 std::swap(buf2, buf3);
297 }
298}
299
300} // namespace openmsx
301
302#endif
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:51
EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
Definition: HQCommon.hh:46
Interface for getting lines from a video frame.
Definition: FrameSource.hh:17
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:93
Polymorphic line scaler.
Definition: LineScalers.hh:285
virtual bool isCopy() const =0
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
virtual Pixel * acquireLine(unsigned y)=0
virtual void releaseLine(unsigned y, Pixel *buf)=0
This file implemented 3 utility functions:
Definition: Autofire.cc:9
uint32_t Pixel
void calcInitialEdges(const Pixel *srcPrev, const Pixel *srcCurr, unsigned srcWidth, unsigned *edgeBuf, EdgeOp edgeOp)
Definition: HQCommon.hh:191
EdgeHQ createEdgeHQ(const PixelOperations< Pixel > &pixelOps)
Definition: HQCommon.hh:85
Pixel writePixel(uint32_t p)
Definition: HQCommon.hh:31
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:127
void doHQScale3(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler< Pixel > &postScale, FrameSource &src, unsigned srcStartY, unsigned, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
Definition: HQCommon.hh:256
uint32_t readPixel(Pixel p)
Definition: HQCommon.hh:18
void calcEdgesGL(const uint32_t *curr, const uint32_t *next, Endian::L32 *edges2, EdgeOp edgeOp)
Definition: HQCommon.hh:105
void doHQScale2(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler< Pixel > &postScale, FrameSource &src, unsigned srcStartY, unsigned, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
Definition: HQCommon.hh:215
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:98
#define VLA(TYPE, NAME, LENGTH)
Definition: vla.hh:10
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
constexpr auto xrange(T e)
Definition: xrange.hh:133