openMSX
CharacterConverter.cc
Go to the documentation of this file.
1 /*
2 TODO:
3 - Clean up renderGraphics2, it is currently very hard to understand
4  with all the masks and quarters etc.
5 - Correctly implement vertical scroll in text modes.
6  Can be implemented by reordering blitting, but uses a smaller
7  wrap than GFX modes: 8 lines instead of 256 lines.
8 */
9 
10 #include "CharacterConverter.hh"
11 #include "VDP.hh"
12 #include "VDPVRAM.hh"
13 #include "xrange.hh"
14 #include "build-info.hh"
15 #include "components.hh"
16 #include <cstdint>
17 
18 #ifdef __SSE2__
19 #include "emmintrin.h" // SSE2
20 #endif
21 
22 namespace openmsx {
23 
24 template<typename Pixel>
26  VDP& vdp_, const Pixel* palFg_, const Pixel* palBg_)
27  : vdp(vdp_), vram(vdp.getVRAM()), palFg(palFg_), palBg(palBg_)
28 {
29  modeBase = 0; // not strictly needed, but avoids Coverity warning
30 }
31 
32 template<typename Pixel>
34 {
35  modeBase = mode.getBase();
36  assert(modeBase < 0x0C);
37 }
38 
39 template<typename Pixel>
41 {
42  // TODO: Support YJK on modes other than Graphic 6/7.
43  switch (modeBase) {
44  case DisplayMode::GRAPHIC1: // screen 1
45  renderGraphic1(linePtr, line);
46  break;
47  case DisplayMode::TEXT1: // screen 0, width 40
48  renderText1(linePtr, line);
49  break;
50  case DisplayMode::MULTICOLOR: // screen 3
51  renderMulti(linePtr, line);
52  break;
53  case DisplayMode::GRAPHIC2: // screen 2
54  renderGraphic2(linePtr, line);
55  break;
56  case DisplayMode::GRAPHIC3: // screen 4
57  renderGraphic2(linePtr, line); // graphic3, actually
58  break;
59  case DisplayMode::TEXT2: // screen 0, width 80
60  renderText2(linePtr, line);
61  break;
62  case DisplayMode::TEXT1Q: // TMSxxxx only
63  if (vdp.isMSX1VDP()) {
64  renderText1Q(linePtr, line);
65  } else {
66  renderBlank (linePtr);
67  }
68  break;
69  case DisplayMode::MULTIQ: // TMSxxxx only
70  if (vdp.isMSX1VDP()) {
71  renderMultiQ(linePtr, line);
72  } else {
73  renderBlank (linePtr);
74  }
75  break;
76  default: // remaining (non-bitmap) modes
77  if (vdp.isMSX1VDP()) {
78  renderBogus(linePtr);
79  } else {
80  renderBlank(linePtr);
81  }
82  }
83 }
84 
85 #ifdef __SSE2__
86 // Copied from Scale2xScaler.cc, TODO move to common location?
87 static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
88 {
89  return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
90 }
91 #endif
92 
93 template<typename Pixel> static inline void draw6(
94  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
95 {
96  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
97  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
98  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
99  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
100  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
101  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
102  pixelPtr += 6;
103 }
104 
105 template<typename Pixel> static inline void draw8(
106  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
107 {
108 #ifdef __SSE2__
109  // SSE2 version, 32bpp (16bpp is possible, but not worth it anymore)
110  if constexpr (sizeof(Pixel) == 4) {
111  const __m128i m74 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
112  const __m128i m30 = _mm_set_epi32(0x01, 0x02, 0x04, 0x08);
113  const __m128i zero = _mm_setzero_si128();
114 
115  __m128i fg4 = _mm_set1_epi32(fg);
116  __m128i bg4 = _mm_set1_epi32(bg);
117  __m128i pat = _mm_set1_epi32(pattern);
118 
119  __m128i b74 = _mm_cmpeq_epi32(_mm_and_si128(pat, m74), zero);
120  __m128i b30 = _mm_cmpeq_epi32(_mm_and_si128(pat, m30), zero);
121 
122  auto* out = reinterpret_cast<__m128i*>(pixelPtr);
123  _mm_storeu_si128(out + 0, select(fg4, bg4, b74));
124  _mm_storeu_si128(out + 1, select(fg4, bg4, b30));
125  pixelPtr += 8;
126  return;
127  }
128 #endif
129 
130  // C++ version
131  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
132  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
133  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
134  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
135  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
136  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
137  pixelPtr[6] = (pattern & 0x02) ? fg : bg;
138  pixelPtr[7] = (pattern & 0x01) ? fg : bg;
139  pixelPtr += 8;
140 }
141 
142 template<typename Pixel>
143 void CharacterConverter<Pixel>::renderText1(
144  Pixel* __restrict pixelPtr, int line)
145 {
146  Pixel fg = palFg[vdp.getForegroundColor()];
147  Pixel bg = palFg[vdp.getBackgroundColor()];
148 
149  // 8 * 256 is small enough to always be contiguous
150  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
151  patternArea += (line + vdp.getVerticalScroll()) & 7;
152 
153  // Note: Because line width is not a power of two, reading an entire line
154  // from a VRAM pointer returned by readArea will not wrap the index
155  // correctly. Therefore we read one character at a time.
156  unsigned nameStart = (line / 8) * 40;
157  unsigned nameEnd = nameStart + 40;
158  for (auto name : xrange(nameStart, nameEnd)) {
159  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
160  unsigned pattern = patternArea[charcode * 8];
161  draw6(pixelPtr, fg, bg, pattern);
162  }
163 }
164 
165 template<typename Pixel>
166 void CharacterConverter<Pixel>::renderText1Q(
167  Pixel* __restrict pixelPtr, int line)
168 {
169  Pixel fg = palFg[vdp.getForegroundColor()];
170  Pixel bg = palFg[vdp.getBackgroundColor()];
171 
172  unsigned patternBaseLine = (~0u << 13) | ((line + vdp.getVerticalScroll()) & 7);
173 
174  // Note: Because line width is not a power of two, reading an entire line
175  // from a VRAM pointer returned by readArea will not wrap the index
176  // correctly. Therefore we read one character at a time.
177  unsigned nameStart = (line / 8) * 40;
178  unsigned nameEnd = nameStart + 40;
179  unsigned patternQuarter = (line & 0xC0) << 2;
180  for (auto name : xrange(nameStart, nameEnd)) {
181  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
182  unsigned patternNr = patternQuarter | charcode;
183  unsigned pattern = vram.patternTable.readNP(
184  patternBaseLine | (patternNr * 8));
185  draw6(pixelPtr, fg, bg, pattern);
186  }
187 }
188 
189 template<typename Pixel>
190 void CharacterConverter<Pixel>::renderText2(
191  Pixel* __restrict pixelPtr, int line)
192 {
193  Pixel plainFg = palFg[vdp.getForegroundColor()];
194  Pixel plainBg = palFg[vdp.getBackgroundColor()];
195  Pixel blinkFg, blinkBg;
196  if (vdp.getBlinkState()) {
197  int fg = vdp.getBlinkForegroundColor();
198  blinkFg = palBg[fg ? fg : vdp.getBlinkBackgroundColor()];
199  blinkBg = palBg[vdp.getBlinkBackgroundColor()];
200  } else {
201  blinkFg = plainFg;
202  blinkBg = plainBg;
203  }
204 
205  // 8 * 256 is small enough to always be contiguous
206  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
207  patternArea += (line + vdp.getVerticalScroll()) & 7;
208 
209  unsigned colorStart = (line / 8) * (80 / 8);
210  unsigned nameStart = (line / 8) * 80;
211  for (auto i : xrange(80 / 8)) {
212  unsigned colorPattern = vram.colorTable.readNP(
213  (colorStart + i) | (~0u << 9));
214  const byte* nameArea = vram.nameTable.getReadArea(
215  (nameStart + 8 * i) | (~0u << 12), 8);
216  draw6(pixelPtr,
217  (colorPattern & 0x80) ? blinkFg : plainFg,
218  (colorPattern & 0x80) ? blinkBg : plainBg,
219  patternArea[nameArea[0] * 8]);
220  draw6(pixelPtr,
221  (colorPattern & 0x40) ? blinkFg : plainFg,
222  (colorPattern & 0x40) ? blinkBg : plainBg,
223  patternArea[nameArea[1] * 8]);
224  draw6(pixelPtr,
225  (colorPattern & 0x20) ? blinkFg : plainFg,
226  (colorPattern & 0x20) ? blinkBg : plainBg,
227  patternArea[nameArea[2] * 8]);
228  draw6(pixelPtr,
229  (colorPattern & 0x10) ? blinkFg : plainFg,
230  (colorPattern & 0x10) ? blinkBg : plainBg,
231  patternArea[nameArea[3] * 8]);
232  draw6(pixelPtr,
233  (colorPattern & 0x08) ? blinkFg : plainFg,
234  (colorPattern & 0x08) ? blinkBg : plainBg,
235  patternArea[nameArea[4] * 8]);
236  draw6(pixelPtr,
237  (colorPattern & 0x04) ? blinkFg : plainFg,
238  (colorPattern & 0x04) ? blinkBg : plainBg,
239  patternArea[nameArea[5] * 8]);
240  draw6(pixelPtr,
241  (colorPattern & 0x02) ? blinkFg : plainFg,
242  (colorPattern & 0x02) ? blinkBg : plainBg,
243  patternArea[nameArea[6] * 8]);
244  draw6(pixelPtr,
245  (colorPattern & 0x01) ? blinkFg : plainFg,
246  (colorPattern & 0x01) ? blinkBg : plainBg,
247  patternArea[nameArea[7] * 8]);
248  }
249 }
250 
251 template<typename Pixel>
252 const byte* CharacterConverter<Pixel>::getNamePtr(int line, int scroll)
253 {
254  // no need to test whether multi-page scrolling is enabled,
255  // indexMask in the nameTable already takes care of it
256  return vram.nameTable.getReadArea(
257  ((line / 8) * 32) | ((scroll & 0x20) ? 0x8000 : 0), 32);
258 }
259 template<typename Pixel>
260 void CharacterConverter<Pixel>::renderGraphic1(
261  Pixel* __restrict pixelPtr, int line)
262 {
263  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
264  patternArea += line & 7;
265  const byte* colorArea = vram.colorTable.getReadArea(0, 256 / 8);
266 
267  int scroll = vdp.getHorizontalScrollHigh();
268  const byte* namePtr = getNamePtr(line, scroll);
269  repeat(32, [&] {
270  unsigned charcode = namePtr[scroll & 0x1F];
271  unsigned pattern = patternArea[charcode * 8];
272  unsigned color = colorArea[charcode / 8];
273  Pixel fg = palFg[color >> 4];
274  Pixel bg = palFg[color & 0x0F];
275  draw8(pixelPtr, fg, bg, pattern);
276  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
277  });
278 }
279 
280 template<typename Pixel>
281 void CharacterConverter<Pixel>::renderGraphic2(
282  Pixel* __restrict pixelPtr, int line)
283 {
284  int quarter8 = (((line / 8) * 32) & ~0xFF) * 8;
285  int line7 = line & 7;
286  int scroll = vdp.getHorizontalScrollHigh();
287  const byte* namePtr = getNamePtr(line, scroll);
288 
289  if (vram.colorTable .isContinuous((8 * 256) - 1) &&
290  vram.patternTable.isContinuous((8 * 256) - 1) &&
291  ((scroll & 0x1f) == 0)) {
292  // Both color and pattern table can be accessed contiguously
293  // (no mirroring) and there's no v9958 horizontal scrolling.
294  // This is very common, so make an optimized version for this.
295  const byte* patternArea = vram.patternTable.getReadArea(quarter8, 8 * 256) + line7;
296  const byte* colorArea = vram.colorTable .getReadArea(quarter8, 8 * 256) + line7;
297  for (auto n : xrange(32)) {
298  unsigned charCode8 = namePtr[n] * 8;
299  unsigned pattern = patternArea[charCode8];
300  unsigned color = colorArea [charCode8];
301  Pixel fg = palFg[color >> 4];
302  Pixel bg = palFg[color & 0x0F];
303  draw8(pixelPtr, fg, bg, pattern);
304  }
305  } else {
306  // Slower variant, also works when:
307  // - there is mirroring in the color table
308  // - there is mirroring in the pattern table (TMS9929)
309  // - V9958 horizontal scroll feature is used
310  int baseLine = (~0u << 13) | quarter8 | line7;
311  repeat(32, [&] {
312  unsigned charCode8 = namePtr[scroll & 0x1F] * 8;
313  unsigned index = charCode8 | baseLine;
314  unsigned pattern = vram.patternTable.readNP(index);
315  unsigned color = vram.colorTable .readNP(index);
316  Pixel fg = palFg[color >> 4];
317  Pixel bg = palFg[color & 0x0F];
318  draw8(pixelPtr, fg, bg, pattern);
319  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
320  });
321  }
322 }
323 
324 template<typename Pixel>
325 void CharacterConverter<Pixel>::renderMultiHelper(
326  Pixel* __restrict pixelPtr, int line,
327  int mask, int patternQuarter)
328 {
329  unsigned baseLine = mask | ((line / 4) & 7);
330  unsigned scroll = vdp.getHorizontalScrollHigh();
331  const byte* namePtr = getNamePtr(line, scroll);
332  repeat(32, [&] {
333  unsigned patternNr = patternQuarter | namePtr[scroll & 0x1F];
334  unsigned color = vram.patternTable.readNP((patternNr * 8) | baseLine);
335  Pixel cl = palFg[color >> 4];
336  Pixel cr = palFg[color & 0x0F];
337  pixelPtr[0] = cl; pixelPtr[1] = cl;
338  pixelPtr[2] = cl; pixelPtr[3] = cl;
339  pixelPtr[4] = cr; pixelPtr[5] = cr;
340  pixelPtr[6] = cr; pixelPtr[7] = cr;
341  pixelPtr += 8;
342  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
343  });
344 }
345 template<typename Pixel>
346 void CharacterConverter<Pixel>::renderMulti(
347  Pixel* __restrict pixelPtr, int line)
348 {
349  int mask = (~0u << 11);
350  renderMultiHelper(pixelPtr, line, mask, 0);
351 }
352 
353 template<typename Pixel>
354 void CharacterConverter<Pixel>::renderMultiQ(
355  Pixel* __restrict pixelPtr, int line)
356 {
357  int mask = (~0u << 13);
358  int patternQuarter = (line * 4) & ~0xFF; // (line / 8) * 32
359  renderMultiHelper(pixelPtr, line, mask, patternQuarter);
360 }
361 
362 template<typename Pixel>
363 void CharacterConverter<Pixel>::renderBogus(
364  Pixel* __restrict pixelPtr)
365 {
366  Pixel fg = palFg[vdp.getForegroundColor()];
367  Pixel bg = palFg[vdp.getBackgroundColor()];
368  auto draw = [&](int n, Pixel col) {
369  pixelPtr = std::fill_n(pixelPtr, n, col);
370 
371  };
372  draw(8, bg);
373  repeat(40, [&] {
374  draw(4, fg);
375  draw(2, bg);
376  });
377  draw(8, bg);
378 }
379 
380 template<typename Pixel>
381 void CharacterConverter<Pixel>::renderBlank(
382  Pixel* __restrict pixelPtr)
383 {
384  // when this is in effect, the VRAM is not refreshed anymore, but that
385  // is not emulated
386  std::fill_n(pixelPtr, 256, palFg[15]);
387 }
388 
389 // Force template instantiation.
390 #if HAVE_16BPP
391 template class CharacterConverter<uint16_t>;
392 #endif
393 #if HAVE_32BPP || COMPONENT_GL
394 template class CharacterConverter<uint32_t>;
395 #endif
396 
397 } // namespace openmsx
CharacterConverter(VDP &vdp, const Pixel *palFg, const Pixel *palBg)
Create a new bitmap scanline converter.
void setDisplayMode(DisplayMode mode)
Select the display mode to use for scanline conversion.
void convertLine(Pixel *linePtr, int line)
Convert a line of V9938 VRAM to 512 host pixels.
Represents a VDP display mode.
Definition: DisplayMode.hh:16
constexpr byte getBase() const
Get the base dispay mode as an integer: M5..M1 combined.
Definition: DisplayMode.hh:115
Unified implementation of MSX Video Display Processors (VDPs).
Definition: VDP.hh:63
This file implemented 3 utility functions:
Definition: Autofire.cc:9
uint32_t Pixel
constexpr nibble mask[4][13]
Definition: RP5C01.cc:34
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
Definition: xrange.hh:170
constexpr auto xrange(T e)
Definition: xrange.hh:155