openMSX
HQCommon.hh
Go to the documentation of this file.
1 #ifndef HQCOMMON_HH
2 #define HQCOMMON_HH
3 
4 #include "FrameSource.hh"
5 #include "ScalerOutput.hh"
6 #include "LineScalers.hh"
7 #include "PixelOperations.hh"
8 #include "endian.hh"
9 #include "vla.hh"
10 #include "xrange.hh"
11 #include "build-info.hh"
12 #include <algorithm>
13 #include <cassert>
14 #include <cstdint>
15 
16 namespace openmsx {
17 
18 template<typename Pixel>
19 [[nodiscard]] inline uint32_t readPixel(Pixel p)
20 {
21  // TODO: Use surface info instead.
22  if constexpr (sizeof(Pixel) == 2) {
23  return ((p & 0xF800) << 8) |
24  ((p & 0x07C0) << 5) | // drop lowest green bit
25  ((p & 0x001F) << 3);
26  } else {
27  return p & 0xF8F8F8F8;
28  }
29 }
30 
31 template<typename Pixel>
32 inline Pixel writePixel(uint32_t p)
33 {
34  // TODO: Use surface info instead.
35  if constexpr (sizeof(Pixel) == 2) {
36  return ((p & 0xF80000) >> 8) |
37  ((p & 0x00FC00) >> 5) |
38  ((p & 0x0000F8) >> 3);
39  } else {
40  return (p & 0xF8F8F8F8) | ((p & 0xE0E0E0E0) >> 5);
41  }
42 }
43 
44 class EdgeHQ
45 {
46 public:
47  EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
48  : shiftR(shiftR_), shiftG(shiftG_), shiftB(shiftB_)
49  {
50  }
51 
52  [[nodiscard]] inline bool operator()(uint32_t c1, uint32_t c2) const
53  {
54  if (c1 == c2) return false;
55 
56  unsigned r1 = (c1 >> shiftR) & 0xFF;
57  unsigned g1 = (c1 >> shiftG) & 0xFF;
58  unsigned b1 = (c1 >> shiftB) & 0xFF;
59 
60  unsigned r2 = (c2 >> shiftR) & 0xFF;
61  unsigned g2 = (c2 >> shiftG) & 0xFF;
62  unsigned b2 = (c2 >> shiftB) & 0xFF;
63 
64  int dr = r1 - r2;
65  int dg = g1 - g2;
66  int db = b1 - b2;
67 
68  int dy = dr + dg + db;
69  if (dy < -0xC0 || dy > 0xC0) return true;
70 
71  int du = dr - db;
72  if (du < -0x1C || du > 0x1C) return true;
73 
74  int dv = 3 * dg - dy;
75  if (dv < -0x30 || dv > 0x30) return true;
76 
77  return false;
78  }
79 private:
80  const unsigned shiftR;
81  const unsigned shiftG;
82  const unsigned shiftB;
83 };
84 
85 template<typename Pixel>
87 {
88  if constexpr (sizeof(Pixel) == 2) {
89  return EdgeHQ(0, 8, 16);
90  } else {
91  return EdgeHQ(pixelOps.getRshift(),
92  pixelOps.getGshift(),
93  pixelOps.getBshift());
94  }
95 }
96 
97 struct EdgeHQLite
98 {
99  [[nodiscard]] inline bool operator()(uint32_t c1, uint32_t c2) const
100  {
101  return c1 != c2;
102  }
103 };
104 
105 template<typename EdgeOp>
106 void calcEdgesGL(const uint32_t* __restrict curr, const uint32_t* __restrict next,
107  Endian::L32* __restrict edges2, EdgeOp edgeOp)
108 {
109  // Consider a grid of 3x3 pixels, numbered like this:
110  // 1 | 2 | 3
111  // ---A---B---
112  // 4 | 5 | 6
113  // ---C---D---
114  // 7 | 8 | 9
115  // Then we calculate 12 'edges':
116  // * 8 star-edges, from the central pixel '5' to the 8 neighbouring pixels.
117  // Let's call these edges 1, 2, 3, 4, 6, 7, 8, 9 (note: 5 is skipped).
118  // * 4 cross-edges, between pixels (2,4), (2,6), (4,8), (6,8).
119  // Let's call these respectively A, B, C, D.
120  // An edge between two pixels means the color of the two pixels is sufficiently distant.
121  // * For the HQ scaler see 'EdgeHQ' for the definition of this distance function.
122  // * The HQlite scaler uses a much simpler distance function.
123  //
124  // We store these 12 edges in a 16-bit value and order them like this:
125  // (MSB (bit 15) on the left, LSB (bit 0) on the left, 'x' means bit is not used)
126  // || B 3 6 9 | D 2 x x || 8 1 A 4 | C 7 x x ||
127  // This order has two important properties:
128  // * The 12 bits are split in 2 groups of 6 bits and each group is MSB
129  // aligned within a byte. This allows to upload this data as a
130  // openGL texture and interpret each texel as a vec2 which
131  // represents a texture coordinate in another 64x64 texture.
132  // * This order allows to calculate the edges incrementally:
133  // Suppose we already calculated the edges for the pixel immediate
134  // above and immediately to the left of the current pixel. Then the edges
135  // (1, 2, 3, A, B) can be calculated as: (upper << 3) & 0xc460
136  // and (4, 7, C) can be calculated as: (left >> 9) & 0x001C
137  // And only edges (6, 8, 9, D) must be newly calculated for this pixel.
138  // So only 4 new edges per pixel instead of all 12.
139  //
140  // This function takes as input:
141  // * an in/out-array 'edges2':
142  // This contains the edge information for the upper row of pixels.
143  // And it gets update in-place to the edge information of the current
144  // row of pixels.
145  // * 2 rows of input pixels: the middle and the lower pixel rows.
146  // * An edge-function (to distinguish 'hq' from 'hqlite').
147 
148  using Pixel = uint32_t;
149 
150  uint32_t pattern = 0;
151  Pixel c5 = curr[0];
152  Pixel c8 = next[0];
153  if (edgeOp(c5, c8)) pattern |= 0x1800'0000; // edges: 9,D (right pixel)
154 
155  for (auto xx : xrange((320 - 2) / 2)) {
156  pattern = (pattern >> (16 + 9)) & 0x001C; // edges: 6,D,9 -> 4,7,C (left pixel)
157  pattern |= (edges2[xx] << 3) & 0xC460'C460; // edges C,8,D,7,9 -> 1,2,3,A,B (left and right)
158 
159  if (edgeOp(c5, c8)) pattern |= 0x0000'0080; // edge: 8 (left)
160  Pixel c6 = curr[2 * xx + 1];
161  if (edgeOp(c6, c8)) pattern |= 0x0004'0800; // edge: D (left), 7 (right)
162  if (edgeOp(c5, c6)) pattern |= 0x0010'2000; // edge: 6 (left), 4 (right)
163  Pixel c9 = next[2 * xx + 1];
164  if (edgeOp(c5, c9)) pattern |= 0x0008'1000; // edge: 9 (left), C (right)
165 
166  if (edgeOp(c6, c9)) pattern |= 0x0080'0000; // edge: 8 (right)
167  c5 = curr[2 * xx + 2];
168  if (edgeOp(c5, c9)) pattern |= 0x0800'0000; // edge: D (right)
169  if (edgeOp(c6, c5)) pattern |= 0x2000'0000; // edge: 6 (right)
170  c8 = next[2 * xx + 2];
171  if (edgeOp(c6, c8)) pattern |= 0x1000'0000; // edge: 9 (right)
172 
173  edges2[xx] = pattern;
174  }
175 
176  pattern = (pattern >> (16 + 9)) & 0x001C; // edges: 6,D,9 -> 4,7,C (left pixel)
177  pattern |= (edges2[159] << 3) & 0xC460'C460; // edges: C,8,D,7,9 -> 1,2,3,A,B (left and right)
178 
179  if (edgeOp(c5, c8)) pattern |= 0x0000'0080; // edge: 8 (left)
180  Pixel c6 = curr[319];
181  if (edgeOp(c6, c8)) pattern |= 0x0004'0800; // edge: D (left), 7 (right)
182  if (edgeOp(c5, c6)) pattern |= 0x0010'2000; // edge: 6 (left), 4 (right)
183  Pixel c9 = next[319];
184  if (edgeOp(c5, c9)) pattern |= 0x0008'1000; // edge: 9 (left), C (right)
185 
186  if (edgeOp(c6, c9)) pattern |= 0x1880'0000; // edges: 8,9,D (right)
187 
188  edges2[159] = pattern;
189 }
190 
191 template<typename Pixel, typename EdgeOp>
193  const Pixel* __restrict srcPrev, const Pixel* __restrict srcCurr,
194  unsigned srcWidth, unsigned* __restrict edgeBuf, EdgeOp edgeOp)
195 {
196  unsigned x = 0;
197  uint32_t c1 = readPixel(srcPrev[x]);
198  uint32_t c2 = readPixel(srcCurr[x]);
199  unsigned pattern = edgeOp(c1, c2) ? ((1 << 6) | (1 << 7)) : 0;
200  for (/* */; x < (srcWidth - 1); ++x) {
201  pattern >>= 6;
202  uint32_t n1 = readPixel(srcPrev[x + 1]);
203  uint32_t n2 = readPixel(srcCurr[x + 1]);
204  if (edgeOp(c1, c2)) pattern |= (1 << 5);
205  if (edgeOp(c1, n2)) pattern |= (1 << 6);
206  if (edgeOp(c2, n1)) pattern |= (1 << 7);
207  edgeBuf[x] = pattern;
208  c1 = n1; c2 = n2;
209  }
210  pattern >>= 6;
211  if (edgeOp(c1, c2)) pattern |= (1 << 5) | (1 << 6) | (1 << 7);
212  edgeBuf[x] = pattern;
213 }
214 
215 template<typename Pixel, typename HQScale, typename EdgeOp>
216 void doHQScale2(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
217  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
218  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
219 {
220  VLA(unsigned, edgeBuf, srcWidth);
221  VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
222  VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
223  VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
224  VLA_SSE_ALIGNED(Pixel, bufA, 2 * srcWidth);
225  VLA_SSE_ALIGNED(Pixel, bufB, 2 * srcWidth);
226 
227  int srcY = srcStartY;
228  auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
229  auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
230 
231  calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
232 
233  bool isCopy = postScale.isCopy();
234  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
235  auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
236  auto* dst0 = dst.acquireLine(dstY + 0);
237  auto* dst1 = dst.acquireLine(dstY + 1);
238  if (isCopy) {
239  hqScale(srcPrev, srcCurr, srcNext, dst0, dst1,
240  srcWidth, edgeBuf, edgeOp);
241  } else {
242  hqScale(srcPrev, srcCurr, srcNext, bufA, bufB,
243  srcWidth, edgeBuf, edgeOp);
244  postScale(bufA, dst0, dstWidth);
245  postScale(bufB, dst1, dstWidth);
246  }
247  dst.releaseLine(dstY + 0, dst0);
248  dst.releaseLine(dstY + 1, dst1);
249  srcPrev = srcCurr;
250  srcCurr = srcNext;
251  std::swap(buf1, buf2);
252  std::swap(buf2, buf3);
253  }
254 }
255 
256 template<typename Pixel, typename HQScale, typename EdgeOp>
257 void doHQScale3(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
258  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
259  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
260 {
261  VLA(unsigned, edgeBuf, srcWidth);
262  VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
263  VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
264  VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
265  VLA_SSE_ALIGNED(Pixel, bufA, 3 * srcWidth);
266  VLA_SSE_ALIGNED(Pixel, bufB, 3 * srcWidth);
267  VLA_SSE_ALIGNED(Pixel, bufC, 3 * srcWidth);
268 
269  int srcY = srcStartY;
270  auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
271  auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
272 
273  calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
274 
275  bool isCopy = postScale.isCopy();
276  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 3) {
277  auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
278  auto* dst0 = dst.acquireLine(dstY + 0);
279  auto* dst1 = dst.acquireLine(dstY + 1);
280  auto* dst2 = dst.acquireLine(dstY + 2);
281  if (isCopy) {
282  hqScale(srcPrev, srcCurr, srcNext, dst0, dst1, dst2,
283  srcWidth, edgeBuf, edgeOp);
284  } else {
285  hqScale(srcPrev, srcCurr, srcNext, bufA, bufB, bufC,
286  srcWidth, edgeBuf, edgeOp);
287  postScale(bufA, dst0, dstWidth);
288  postScale(bufB, dst1, dstWidth);
289  postScale(bufC, dst2, dstWidth);
290  }
291  dst.releaseLine(dstY + 0, dst0);
292  dst.releaseLine(dstY + 1, dst1);
293  dst.releaseLine(dstY + 2, dst2);
294  srcPrev = srcCurr;
295  srcCurr = srcNext;
296  std::swap(buf1, buf2);
297  std::swap(buf2, buf3);
298  }
299 }
300 
301 } // namespace openmsx
302 
303 #endif
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:52
EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
Definition: HQCommon.hh:47
Interface for getting lines from a video frame.
Definition: FrameSource.hh:15
const Pixel * getLinePtr(int line, unsigned width, Pixel *buf) const
Gets a pointer to the pixels of the given line number.
Definition: FrameSource.hh:91
Polymorphic line scaler.
Definition: LineScalers.hh:285
virtual bool isCopy() const =0
Is this scale operation actually a copy? This info can be used to (in a multi-step scale operation) i...
virtual Pixel * acquireLine(unsigned y)=0
virtual void releaseLine(unsigned y, Pixel *buf)=0
This file implemented 3 utility functions:
Definition: Autofire.cc:9
uint32_t Pixel
void calcInitialEdges(const Pixel *srcPrev, const Pixel *srcCurr, unsigned srcWidth, unsigned *edgeBuf, EdgeOp edgeOp)
Definition: HQCommon.hh:192
EdgeHQ createEdgeHQ(const PixelOperations< Pixel > &pixelOps)
Definition: HQCommon.hh:86
Pixel writePixel(uint32_t p)
Definition: HQCommon.hh:32
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:124
void doHQScale3(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler< Pixel > &postScale, FrameSource &src, unsigned srcStartY, unsigned, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
Definition: HQCommon.hh:257
uint32_t readPixel(Pixel p)
Definition: HQCommon.hh:19
void calcEdgesGL(const uint32_t *curr, const uint32_t *next, Endian::L32 *edges2, EdgeOp edgeOp)
Definition: HQCommon.hh:106
void doHQScale2(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler< Pixel > &postScale, FrameSource &src, unsigned srcStartY, unsigned, unsigned srcWidth, ScalerOutput< Pixel > &dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
Definition: HQCommon.hh:216
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:99
#define VLA(TYPE, NAME, LENGTH)
Definition: vla.hh:10
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
constexpr auto xrange(T e)
Definition: xrange.hh:155