openMSX
HQCommon.hh
Go to the documentation of this file.
1 #ifndef HQCOMMON_HH
2 #define HQCOMMON_HH
3 
4 #include "FrameSource.hh"
5 #include "ScalerOutput.hh"
6 #include "LineScalers.hh"
7 #include "PixelOperations.hh"
8 #include "endian.hh"
9 #include "vla.hh"
10 #include "build-info.hh"
11 #include <algorithm>
12 #include <cassert>
13 #include <cstdint>
14 
15 namespace openmsx {
16 
17 template <typename Pixel>
18 static inline uint32_t readPixel(Pixel p)
19 {
20  // TODO: Use surface info instead.
21  if (sizeof(Pixel) == 2) {
22  return ((p & 0xF800) << 8) |
23  ((p & 0x07C0) << 5) | // drop lowest green bit
24  ((p & 0x001F) << 3);
25  } else {
26  return p & 0xF8F8F8F8;
27  }
28 }
29 
30 template <typename Pixel>
31 static inline Pixel writePixel(uint32_t p)
32 {
33  // TODO: Use surface info instead.
34  if (sizeof(Pixel) == 2) {
35  return ((p & 0xF80000) >> 8) |
36  ((p & 0x00FC00) >> 5) |
37  ((p & 0x0000F8) >> 3);
38  } else {
39  return (p & 0xF8F8F8F8) | ((p & 0xE0E0E0E0) >> 5);
40  }
41 }
42 
43 class EdgeHQ
44 {
45 public:
46  EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
47  : shiftR(shiftR_), shiftG(shiftG_), shiftB(shiftB_)
48  {
49  }
50 
51  inline bool operator()(uint32_t c1, uint32_t c2) const
52  {
53  if (c1 == c2) return false;
54 
55  unsigned r1 = (c1 >> shiftR) & 0xFF;
56  unsigned g1 = (c1 >> shiftG) & 0xFF;
57  unsigned b1 = (c1 >> shiftB) & 0xFF;
58 
59  unsigned r2 = (c2 >> shiftR) & 0xFF;
60  unsigned g2 = (c2 >> shiftG) & 0xFF;
61  unsigned b2 = (c2 >> shiftB) & 0xFF;
62 
63  int dr = r1 - r2;
64  int dg = g1 - g2;
65  int db = b1 - b2;
66 
67  int dy = dr + dg + db;
68  if (dy < -0xC0 || dy > 0xC0) return true;
69 
70  int du = dr - db;
71  if (du < -0x1C || du > 0x1C) return true;
72 
73  int dv = 3 * dg - dy;
74  if (dv < -0x30 || dv > 0x30) return true;
75 
76  return false;
77  }
78 private:
79  const unsigned shiftR;
80  const unsigned shiftG;
81  const unsigned shiftB;
82 };
83 
84 template<typename Pixel>
86 {
87  if (sizeof(Pixel) == 2) {
88  return EdgeHQ(0, 8, 16);
89  } else {
90  return EdgeHQ(pixelOps.getRshift(),
91  pixelOps.getGshift(),
92  pixelOps.getBshift());
93  }
94 }
95 
96 struct EdgeHQLite
97 {
98  inline bool operator()(uint32_t c1, uint32_t c2) const
99  {
100  return c1 != c2;
101  }
102 };
103 
104 template <typename EdgeOp>
105 void calcEdgesGL(const uint32_t* __restrict curr, const uint32_t* __restrict next,
106  Endian::L32* __restrict edges2, EdgeOp edgeOp)
107 {
108  // Consider a grid of 3x3 pixels, numbered like this:
109  // 1 | 2 | 3
110  // ---A---B---
111  // 4 | 5 | 6
112  // ---C---D---
113  // 7 | 8 | 9
114  // Then we calculate 12 'edges':
115  // * 8 star-edges, from the central pixel '5' to the 8 neighbouring pixels.
116  // Let's call these edges 1, 2, 3, 4, 6, 7, 8, 9 (note: 5 is skipped).
117  // * 4 cross-edges, between pixels (2,4), (2,6), (4,8), (6,8).
118  // Let's call these respectively A, B, C, D.
119  // An edge between two pixels means the color of the two pixels is sufficiently distant.
120  // * For the HQ scaler see 'EdgeHQ' for the definition of this distance function.
121  // * The HQlite scaler uses a much simpler distance function.
122  //
123  // We store these 12 edges in a 16-bit value and order them like this:
124  // (MSB (bit 15) on the left, LSB (bit 0) on the left, 'x' means bit is not used)
125  // || B 3 6 9 | D 2 x x || 8 1 A 4 | C 7 x x ||
126  // This order has two important properties:
127  // * The 12 bits are split in 2 groups of 6 bits and each group is MSB
128  // aligned within a byte. This allows to upload this data as a
129  // openGL texture and interpret each texel as a vec2 which
130  // represents a texture coordinate in another 64x64 texture.
131  // * This order allows to calculate the edges incrementally:
132  // Suppose we already calculated the edges for the pixel immediate
133  // above and immediately to the left of the current pixel. Then the edges
134  // (1, 2, 3, A, B) can be calculated as: (upper << 3) & 0xc460
135  // and (4, 7, C) can be calculated as: (left >> 9) & 0x001C
136  // And only edges (6, 8, 9, D) must be newly calculated for this pixel.
137  // So only 4 new edges per pixel instead of all 12.
138  //
139  // This function takes as input:
140  // * an in/out-array 'edges2':
141  // This contains the edge information for the upper row of pixels.
142  // And it gets update in-place to the edge information of the current
143  // row of pixels.
144  // * 2 rows of input pixels: the middle and the lower pixel rows.
145  // * An edge-function (to distinguish 'hq' from 'hqlite').
146 
147  using Pixel = uint32_t;
148 
149  uint32_t pattern = 0;
150  Pixel c5 = curr[0];
151  Pixel c8 = next[0];
152  if (edgeOp(c5, c8)) pattern |= 0x1800'0000; // edges: 9,D (right pixel)
153 
154  for (unsigned xx = 0; xx < (320 - 2) / 2; ++xx) {
155  pattern = (pattern >> (16 + 9)) & 0x001C; // edges: 6,D,9 -> 4,7,C (left pixel)
156  pattern |= (edges2[xx] << 3) & 0xC460'C460; // edges C,8,D,7,9 -> 1,2,3,A,B (left and right)
157 
158  if (edgeOp(c5, c8)) pattern |= 0x0000'0080; // edge: 8 (left)
159  Pixel c6 = curr[2 * xx + 1];
160  if (edgeOp(c6, c8)) pattern |= 0x0004'0800; // edge: D (left), 7 (right)
161  if (edgeOp(c5, c6)) pattern |= 0x0010'2000; // edge: 6 (left), 4 (right)
162  Pixel c9 = next[2 * xx + 1];
163  if (edgeOp(c5, c9)) pattern |= 0x0008'1000; // edge: 9 (left), C (right)
164 
165  if (edgeOp(c6, c9)) pattern |= 0x0080'0000; // edge: 8 (right)
166  c5 = curr[2 * xx + 2];
167  if (edgeOp(c5, c9)) pattern |= 0x0800'0000; // edge: D (right)
168  if (edgeOp(c6, c5)) pattern |= 0x2000'0000; // edge: 6 (right)
169  c8 = next[2 * xx + 2];
170  if (edgeOp(c6, c8)) pattern |= 0x1000'0000; // edge: 9 (right)
171 
172  edges2[xx] = pattern;
173  }
174 
175  pattern = (pattern >> (16 + 9)) & 0x001C; // edges: 6,D,9 -> 4,7,C (left pixel)
176  pattern |= (edges2[159] << 3) & 0xC460'C460; // edges: C,8,D,7,9 -> 1,2,3,A,B (left and right)
177 
178  if (edgeOp(c5, c8)) pattern |= 0x0000'0080; // edge: 8 (left)
179  Pixel c6 = curr[319];
180  if (edgeOp(c6, c8)) pattern |= 0x0004'0800; // edge: D (left), 7 (right)
181  if (edgeOp(c5, c6)) pattern |= 0x0010'2000; // edge: 6 (left), 4 (right)
182  Pixel c9 = next[319];
183  if (edgeOp(c5, c9)) pattern |= 0x0008'1000; // edge: 9 (left), C (right)
184 
185  if (edgeOp(c6, c9)) pattern |= 0x1880'0000; // edges: 8,9,D (right)
186 
187  edges2[159] = pattern;
188 }
189 
190 template <typename Pixel, typename EdgeOp>
191 static void calcInitialEdges(
192  const Pixel* __restrict srcPrev, const Pixel* __restrict srcCurr,
193  unsigned srcWidth, unsigned* __restrict edgeBuf, EdgeOp edgeOp)
194 {
195  unsigned x = 0;
196  uint32_t c1 = readPixel(srcPrev[x]);
197  uint32_t c2 = readPixel(srcCurr[x]);
198  unsigned pattern = edgeOp(c1, c2) ? ((1 << 6) | (1 << 7)) : 0;
199  for (/* */; x < (srcWidth - 1); ++x) {
200  pattern >>= 6;
201  uint32_t n1 = readPixel(srcPrev[x + 1]);
202  uint32_t n2 = readPixel(srcCurr[x + 1]);
203  if (edgeOp(c1, c2)) pattern |= (1 << 5);
204  if (edgeOp(c1, n2)) pattern |= (1 << 6);
205  if (edgeOp(c2, n1)) pattern |= (1 << 7);
206  edgeBuf[x] = pattern;
207  c1 = n1; c2 = n2;
208  }
209  pattern >>= 6;
210  if (edgeOp(c1, c2)) pattern |= (1 << 5) | (1 << 6) | (1 << 7);
211  edgeBuf[x] = pattern;
212 }
213 
214 template <typename Pixel, typename HQScale, typename EdgeOp>
215 static void doHQScale2(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
216  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
217  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
218 {
219  VLA(unsigned, edgeBuf, srcWidth);
220  VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
221  VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
222  VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
223  VLA_SSE_ALIGNED(Pixel, bufA, 2 * srcWidth);
224  VLA_SSE_ALIGNED(Pixel, bufB, 2 * srcWidth);
225 
226  int srcY = srcStartY;
227  auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
228  auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
229 
230  calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
231 
232  bool isCopy = postScale.isCopy();
233  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
234  auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
235  auto* dst0 = dst.acquireLine(dstY + 0);
236  auto* dst1 = dst.acquireLine(dstY + 1);
237  if (isCopy) {
238  hqScale(srcPrev, srcCurr, srcNext, dst0, dst1,
239  srcWidth, edgeBuf, edgeOp);
240  } else {
241  hqScale(srcPrev, srcCurr, srcNext, bufA, bufB,
242  srcWidth, edgeBuf, edgeOp);
243  postScale(bufA, dst0, dstWidth);
244  postScale(bufB, dst1, dstWidth);
245  }
246  dst.releaseLine(dstY + 0, dst0);
247  dst.releaseLine(dstY + 1, dst1);
248  srcPrev = srcCurr;
249  srcCurr = srcNext;
250  std::swap(buf1, buf2);
251  std::swap(buf2, buf3);
252  }
253 }
254 
255 template <typename Pixel, typename HQScale, typename EdgeOp>
256 static void doHQScale3(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
257  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
258  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
259 {
260  VLA(unsigned, edgeBuf, srcWidth);
261  VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
262  VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
263  VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
264  VLA_SSE_ALIGNED(Pixel, bufA, 3 * srcWidth);
265  VLA_SSE_ALIGNED(Pixel, bufB, 3 * srcWidth);
266  VLA_SSE_ALIGNED(Pixel, bufC, 3 * srcWidth);
267 
268  int srcY = srcStartY;
269  auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
270  auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
271 
272  calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
273 
274  bool isCopy = postScale.isCopy();
275  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 3) {
276  auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
277  auto* dst0 = dst.acquireLine(dstY + 0);
278  auto* dst1 = dst.acquireLine(dstY + 1);
279  auto* dst2 = dst.acquireLine(dstY + 2);
280  if (isCopy) {
281  hqScale(srcPrev, srcCurr, srcNext, dst0, dst1, dst2,
282  srcWidth, edgeBuf, edgeOp);
283  } else {
284  hqScale(srcPrev, srcCurr, srcNext, bufA, bufB, bufC,
285  srcWidth, edgeBuf, edgeOp);
286  postScale(bufA, dst0, dstWidth);
287  postScale(bufB, dst1, dstWidth);
288  postScale(bufC, dst2, dstWidth);
289  }
290  dst.releaseLine(dstY + 0, dst0);
291  dst.releaseLine(dstY + 1, dst1);
292  dst.releaseLine(dstY + 2, dst2);
293  srcPrev = srcCurr;
294  srcCurr = srcNext;
295  std::swap(buf1, buf2);
296  std::swap(buf2, buf3);
297  }
298 }
299 
300 } // namespace openmsx
301 
302 #endif
openmsx::PixelOpBase::getGshift
int getGshift() const
Definition: PixelOperations.hh:27
openmsx::EdgeHQ
Definition: HQCommon.hh:44
openmsx::calcEdgesGL
void calcEdgesGL(const uint32_t *curr, const uint32_t *next, Endian::L32 *edges2, EdgeOp edgeOp)
Definition: HQCommon.hh:105
openmsx::PixelOpBase::getBshift
int getBshift() const
Definition: PixelOperations.hh:28
openmsx::PixelOperations
Definition: PixelOperations.hh:143
openmsx::EdgeHQLite::operator()
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:98
FrameSource.hh
vla.hh
openmsx::EdgeHQLite
Definition: HQCommon.hh:97
openmsx::Pixel
uint32_t Pixel
Definition: GLHQLiteScaler.cc:98
openmsx::PixelOpBase::getRshift
int getRshift() const
Definition: PixelOperations.hh:26
ScalerOutput.hh
openmsx::EdgeHQ::EdgeHQ
EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
Definition: HQCommon.hh:46
build-info.hh
endian.hh
Endian::EndianT
Definition: endian.hh:71
PixelOperations.hh
LineScalers.hh
openmsx::createEdgeHQ
EdgeHQ createEdgeHQ(const PixelOperations< Pixel > &pixelOps)
Definition: HQCommon.hh:85
openmsx::x
constexpr KeyMatrixPosition x
Keyboard bindings.
Definition: Keyboard.cc:1419
std::swap
void swap(openmsx::MemBuffer< T > &l, openmsx::MemBuffer< T > &r) noexcept
Definition: MemBuffer.hh:202
openmsx::EdgeHQ::operator()
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:51
VLA_SSE_ALIGNED
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44
VLA
#define VLA(TYPE, NAME, LENGTH)
Definition: vla.hh:10
openmsx
This file implemented 3 utility functions:
Definition: Autofire.cc:5