openMSX
CharacterConverter.cc
Go to the documentation of this file.
1 /*
2 TODO:
3 - Clean up renderGraphics2, it is currently very hard to understand
4  with all the masks and quarters etc.
5 - Correctly implement vertical scroll in text modes.
6  Can be implemented by reordering blitting, but uses a smaller
7  wrap than GFX modes: 8 lines instead of 256 lines.
8 */
9 
10 #include "CharacterConverter.hh"
11 #include "VDP.hh"
12 #include "VDPVRAM.hh"
13 #include "build-info.hh"
14 #include "components.hh"
15 #include <cstdint>
16 
17 #ifdef __SSE2__
18 #include "emmintrin.h" // SSE2
19 #endif
20 
21 namespace openmsx {
22 
23 template <class Pixel>
25  VDP& vdp_, const Pixel* palFg_, const Pixel* palBg_)
26  : vdp(vdp_), vram(vdp.getVRAM()), palFg(palFg_), palBg(palBg_)
27 {
28  modeBase = 0; // not strictly needed, but avoids Coverity warning
29 }
30 
31 template <class Pixel>
33 {
34  modeBase = mode.getBase();
35  assert(modeBase < 0x0C);
36 }
37 
38 template <class Pixel>
40 {
41  // TODO: Support YJK on modes other than Graphic 6/7.
42  switch (modeBase) {
43  case DisplayMode::GRAPHIC1: // screen 1
44  renderGraphic1(linePtr, line);
45  break;
46  case DisplayMode::TEXT1: // screen 0, width 40
47  renderText1(linePtr, line);
48  break;
49  case DisplayMode::MULTICOLOR: // screen 3
50  renderMulti(linePtr, line);
51  break;
52  case DisplayMode::GRAPHIC2: // screen 2
53  renderGraphic2(linePtr, line);
54  break;
55  case DisplayMode::GRAPHIC3: // screen 4
56  renderGraphic2(linePtr, line); // graphic3, actually
57  break;
58  case DisplayMode::TEXT2: // screen 0, width 80
59  renderText2(linePtr, line);
60  break;
61  case DisplayMode::TEXT1Q: // TMSxxxx only
62  if (vdp.isMSX1VDP()) {
63  renderText1Q(linePtr, line);
64  } else {
65  renderBlank (linePtr);
66  }
67  break;
68  case DisplayMode::MULTIQ: // TMSxxxx only
69  if (vdp.isMSX1VDP()) {
70  renderMultiQ(linePtr, line);
71  } else {
72  renderBlank (linePtr);
73  }
74  break;
75  default: // remaining (non-bitmap) modes
76  if (vdp.isMSX1VDP()) {
77  renderBogus(linePtr);
78  } else {
79  renderBlank(linePtr);
80  }
81  }
82 }
83 
84 #ifdef __SSE2__
85 // Copied from Scale2xScaler.cc, TODO move to common location?
86 static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
87 {
88  return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
89 }
90 #endif
91 
92 template<typename Pixel> static inline void draw6(
93  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
94 {
95  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
96  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
97  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
98  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
99  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
100  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
101  pixelPtr += 6;
102 }
103 
104 template<typename Pixel> static inline void draw8(
105  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern,
106  bool misAligned, uint32_t& partial)
107 {
108 #ifdef __arm__
109  // ARM version, 16bpp, (32-bit aligned/unaligned destination)
110  if (sizeof(Pixel) == 2) {
111  if (misAligned) {
112  asm volatile (
113  "mov r0,%[PART]\n\t"
114  "tst %[PAT],#128\n\t"
115  "ite eq\n\t"
116  "orreq r0,r0,%[BG], lsl #16\n\t"
117  "orrne r0,r0,%[FG], lsl #16\n\t"
118  "tst %[PAT],#64\n\t"
119  "ite eq\n\t"
120  "moveq r1,%[BG]\n\t"
121  "movne r1,%[FG]\n\t"
122  "tst %[PAT],#32\n\t"
123  "ite eq\n\t"
124  "orreq r1,r1,%[BG], lsl #16\n\t"
125  "orrne r1,r1,%[FG], lsl #16\n\t"
126  "tst %[PAT],#16\n\t"
127  "ite eq\n\t"
128  "moveq r2,%[BG]\n\t"
129  "movne r2,%[FG]\n\t"
130  "tst %[PAT],#8\n\t"
131  "ite eq\n\t"
132  "orreq r2,r2,%[BG], lsl #16\n\t"
133  "orrne r2,r2,%[FG], lsl #16\n\t"
134  "tst %[PAT],#4\n\t"
135  "ite eq\n\t"
136  "moveq r3,%[BG]\n\t"
137  "movne r3,%[FG]\n\t"
138  "tst %[PAT],#2\n\t"
139  "ite eq\n\t"
140  "orreq r3,r3,%[BG], lsl #16\n\t"
141  "orrne r3,r3,%[FG], lsl #16\n\t"
142  "tst %[PAT],#1\n\t"
143  "ite eq\n\t"
144  "moveq %[PART],%[BG]\n\t"
145  "movne %[PART],%[FG]\n\t"
146  "stmia %[OUT]!,{r0-r3}\n\t"
147  : [OUT] "=r" (pixelPtr)
148  , [PART] "=r" (partial)
149  : "[OUT]" (pixelPtr)
150  , "[PART]" (partial)
151  , [PAT] "r" (pattern)
152  , [FG] "r" (uint32_t(fg))
153  , [BG] "r" (uint32_t(bg))
154  : "r0","r1","r2","r3","memory"
155  );
156  } else {
157  asm volatile (
158  "tst %[PAT],#128\n\t"
159  "ite eq\n\t"
160  "moveq r0,%[BG]\n\t"
161  "movne r0,%[FG]\n\t"
162  "tst %[PAT],#64\n\t"
163  "ite eq\n\t"
164  "orreq r0,r0,%[BG], lsl #16\n\t"
165  "orrne r0,r0,%[FG], lsl #16\n\t"
166  "tst %[PAT],#32\n\t"
167  "ite eq\n\t"
168  "moveq r1,%[BG]\n\t"
169  "movne r1,%[FG]\n\t"
170  "tst %[PAT],#16\n\t"
171  "ite eq\n\t"
172  "orreq r1,r1,%[BG], lsl #16\n\t"
173  "orrne r1,r1,%[FG], lsl #16\n\t"
174  "tst %[PAT],#8\n\t"
175  "ite eq\n\t"
176  "moveq r2,%[BG]\n\t"
177  "movne r2,%[FG]\n\t"
178  "tst %[PAT],#4\n\t"
179  "ite eq\n\t"
180  "orreq r2,r2,%[BG], lsl #16\n\t"
181  "orrne r2,r2,%[FG], lsl #16\n\t"
182  "tst %[PAT],#2\n\t"
183  "ite eq\n\t"
184  "moveq r3,%[BG]\n\t"
185  "movne r3,%[FG]\n\t"
186  "tst %[PAT],#1\n\t"
187  "ite eq\n\t"
188  "orreq r3,r3,%[BG], lsl #16\n\t"
189  "orrne r3,r3,%[FG], lsl #16\n\t"
190  "stmia %[OUT]!,{r0-r3}\n\t"
191 
192  : [OUT] "=r" (pixelPtr)
193  : "[OUT]" (pixelPtr)
194  , [PAT] "r" (pattern)
195  , [FG] "r" (uint32_t(fg))
196  , [BG] "r" (uint32_t(bg))
197  : "r0","r1","r2","r3","memory"
198  );
199  }
200  return;
201  }
202 #endif
203  (void)misAligned; (void)partial;
204 
205 #ifdef __SSE2__
206  // SSE2 version, 32bpp (16bpp is possible, but not worth it anymore)
207  if (sizeof(Pixel) == 4) {
208  const __m128i m74 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
209  const __m128i m30 = _mm_set_epi32(0x01, 0x02, 0x04, 0x08);
210  const __m128i zero = _mm_setzero_si128();
211 
212  __m128i fg4 = _mm_set1_epi32(fg);
213  __m128i bg4 = _mm_set1_epi32(bg);
214  __m128i pat = _mm_set1_epi32(pattern);
215 
216  __m128i b74 = _mm_cmpeq_epi32(_mm_and_si128(pat, m74), zero);
217  __m128i b30 = _mm_cmpeq_epi32(_mm_and_si128(pat, m30), zero);
218 
219  __m128i* out = reinterpret_cast<__m128i*>(pixelPtr);
220  _mm_storeu_si128(out + 0, select(fg4, bg4, b74));
221  _mm_storeu_si128(out + 1, select(fg4, bg4, b30));
222  pixelPtr += 8;
223  return;
224  }
225 #endif
226 
227  // C++ version
228  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
229  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
230  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
231  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
232  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
233  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
234  pixelPtr[6] = (pattern & 0x02) ? fg : bg;
235  pixelPtr[7] = (pattern & 0x01) ? fg : bg;
236  pixelPtr += 8;
237 }
238 
239 template <class Pixel>
241  Pixel* __restrict pixelPtr, int line)
242 {
243  Pixel fg = palFg[vdp.getForegroundColor()];
244  Pixel bg = palFg[vdp.getBackgroundColor()];
245 
246  // 8 * 256 is small enough to always be contiguous
247  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
248  patternArea += (line + vdp.getVerticalScroll()) & 7;
249 
250  // Note: Because line width is not a power of two, reading an entire line
251  // from a VRAM pointer returned by readArea will not wrap the index
252  // correctly. Therefore we read one character at a time.
253  unsigned nameStart = (line / 8) * 40;
254  unsigned nameEnd = nameStart + 40;
255  for (unsigned name = nameStart; name < nameEnd; ++name) {
256  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
257  unsigned pattern = patternArea[charcode * 8];
258  draw6(pixelPtr, fg, bg, pattern);
259  }
260 }
261 
262 template <class Pixel>
264  Pixel* __restrict pixelPtr, int line)
265 {
266  Pixel fg = palFg[vdp.getForegroundColor()];
267  Pixel bg = palFg[vdp.getBackgroundColor()];
268 
269  unsigned patternBaseLine = (~0u << 13) | ((line + vdp.getVerticalScroll()) & 7);
270 
271  // Note: Because line width is not a power of two, reading an entire line
272  // from a VRAM pointer returned by readArea will not wrap the index
273  // correctly. Therefore we read one character at a time.
274  unsigned nameStart = (line / 8) * 40;
275  unsigned nameEnd = nameStart + 40;
276  unsigned patternQuarter = (line & 0xC0) << 2;
277  for (unsigned name = nameStart; name < nameEnd; ++name) {
278  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
279  unsigned patternNr = patternQuarter | charcode;
280  unsigned pattern = vram.patternTable.readNP(
281  patternBaseLine | (patternNr * 8));
282  draw6(pixelPtr, fg, bg, pattern);
283  }
284 }
285 
286 template <class Pixel>
288  Pixel* __restrict pixelPtr, int line)
289 {
290  Pixel plainFg = palFg[vdp.getForegroundColor()];
291  Pixel plainBg = palFg[vdp.getBackgroundColor()];
292  Pixel blinkFg, blinkBg;
293  if (vdp.getBlinkState()) {
294  int fg = vdp.getBlinkForegroundColor();
295  blinkFg = palBg[fg ? fg : vdp.getBlinkBackgroundColor()];
296  blinkBg = palBg[vdp.getBlinkBackgroundColor()];
297  } else {
298  blinkFg = plainFg;
299  blinkBg = plainBg;
300  }
301 
302  // 8 * 256 is small enough to always be contiguous
303  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
304  patternArea += (line + vdp.getVerticalScroll()) & 7;
305 
306  unsigned colorStart = (line / 8) * (80 / 8);
307  unsigned nameStart = (line / 8) * 80;
308  for (unsigned i = 0; i < (80 / 8); ++i) {
309  unsigned colorPattern = vram.colorTable.readNP(
310  (colorStart + i) | (~0u << 9));
311  const byte* nameArea = vram.nameTable.getReadArea(
312  (nameStart + 8 * i) | (~0u << 12), 8);
313  draw6(pixelPtr,
314  (colorPattern & 0x80) ? blinkFg : plainFg,
315  (colorPattern & 0x80) ? blinkBg : plainBg,
316  patternArea[nameArea[0] * 8]);
317  draw6(pixelPtr,
318  (colorPattern & 0x40) ? blinkFg : plainFg,
319  (colorPattern & 0x40) ? blinkBg : plainBg,
320  patternArea[nameArea[1] * 8]);
321  draw6(pixelPtr,
322  (colorPattern & 0x20) ? blinkFg : plainFg,
323  (colorPattern & 0x20) ? blinkBg : plainBg,
324  patternArea[nameArea[2] * 8]);
325  draw6(pixelPtr,
326  (colorPattern & 0x10) ? blinkFg : plainFg,
327  (colorPattern & 0x10) ? blinkBg : plainBg,
328  patternArea[nameArea[3] * 8]);
329  draw6(pixelPtr,
330  (colorPattern & 0x08) ? blinkFg : plainFg,
331  (colorPattern & 0x08) ? blinkBg : plainBg,
332  patternArea[nameArea[4] * 8]);
333  draw6(pixelPtr,
334  (colorPattern & 0x04) ? blinkFg : plainFg,
335  (colorPattern & 0x04) ? blinkBg : plainBg,
336  patternArea[nameArea[5] * 8]);
337  draw6(pixelPtr,
338  (colorPattern & 0x02) ? blinkFg : plainFg,
339  (colorPattern & 0x02) ? blinkBg : plainBg,
340  patternArea[nameArea[6] * 8]);
341  draw6(pixelPtr,
342  (colorPattern & 0x01) ? blinkFg : plainFg,
343  (colorPattern & 0x01) ? blinkBg : plainBg,
344  patternArea[nameArea[7] * 8]);
345  }
346 }
347 
348 template <class Pixel>
349 const byte* CharacterConverter<Pixel>::getNamePtr(int line, int scroll)
350 {
351  // no need to test whether multi-page scrolling is enabled,
352  // indexMask in the nameTable already takes care of it
353  return vram.nameTable.getReadArea(
354  ((line / 8) * 32) | ((scroll & 0x20) ? 0x8000 : 0), 32);
355 }
356 template <class Pixel>
358  Pixel* __restrict pixelPtr, int line)
359 {
360  bool misAligned = false; // initialize with dummy
361  uint32_t partial = 0; // values to avoid warning
362 #ifdef __arm__
363  misAligned = sizeof(Pixel) == 2 && (reinterpret_cast<uintptr_t>(pixelPtr) & 3);
364  if (misAligned) pixelPtr--;
365  partial = *pixelPtr;
366 #endif
367 
368  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
369  patternArea += line & 7;
370  const byte* colorArea = vram.colorTable.getReadArea(0, 256 / 8);
371 
372  int scroll = vdp.getHorizontalScrollHigh();
373  const byte* namePtr = getNamePtr(line, scroll);
374  for (unsigned n = 0; n < 32; ++n) {
375  unsigned charcode = namePtr[scroll & 0x1F];
376  unsigned pattern = patternArea[charcode * 8];
377  unsigned color = colorArea[charcode / 8];
378  Pixel fg = palFg[color >> 4];
379  Pixel bg = palFg[color & 0x0F];
380  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
381  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
382  }
383 
384 #ifdef __arm__
385  if (misAligned) *pixelPtr = static_cast<Pixel>(partial);
386 #endif
387 }
388 
389 template <class Pixel>
391  Pixel* __restrict pixelPtr, int line)
392 {
393  bool misAligned = false; // initialize with dummy
394  uint32_t partial = 0; // values to avoid warning
395 #ifdef __arm__
396  misAligned = sizeof(Pixel) == 2 && (reinterpret_cast<uintptr_t>(pixelPtr) & 3);
397  if (misAligned) pixelPtr--;
398  partial = *pixelPtr;
399 #endif
400 
401  int quarter8 = (((line / 8) * 32) & ~0xFF) * 8;
402  int line7 = line & 7;
403  int scroll = vdp.getHorizontalScrollHigh();
404  const byte* namePtr = getNamePtr(line, scroll);
405 
406  if (vram.colorTable .isContinuous((8 * 256) - 1) &&
407  vram.patternTable.isContinuous((8 * 256) - 1) &&
408  ((scroll & 0x1f) == 0)) {
409  // Both color and pattern table can be accessed contiguously
410  // (no mirroring) and there's no v9958 horizontal scrolling.
411  // This is very common, so make an optimized version for this.
412  const byte* patternArea = vram.patternTable.getReadArea(quarter8, 8 * 256) + line7;
413  const byte* colorArea = vram.colorTable .getReadArea(quarter8, 8 * 256) + line7;
414  for (unsigned n = 0; n < 32; ++n) {
415  unsigned charCode8 = namePtr[n] * 8;
416  unsigned pattern = patternArea[charCode8];
417  unsigned color = colorArea [charCode8];
418  Pixel fg = palFg[color >> 4];
419  Pixel bg = palFg[color & 0x0F];
420  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
421  }
422  } else {
423  // Slower variant, also works when:
424  // - there is mirroring in the color table
425  // - there is mirroring in the pattern table (TMS9929)
426  // - V9958 horizontal scroll feature is used
427  int baseLine = (~0u << 13) | quarter8 | line7;
428  for (unsigned n = 0; n < 32; ++n) {
429  unsigned charCode8 = namePtr[scroll & 0x1F] * 8;
430  unsigned index = charCode8 | baseLine;
431  unsigned pattern = vram.patternTable.readNP(index);
432  unsigned color = vram.colorTable .readNP(index);
433  Pixel fg = palFg[color >> 4];
434  Pixel bg = palFg[color & 0x0F];
435  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
436  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
437  }
438  }
439 
440 #ifdef __arm__
441  if (misAligned) *pixelPtr = static_cast<Pixel>(partial);
442 #endif
443 }
444 
445 template <class Pixel>
447  Pixel* __restrict pixelPtr, int line,
448  int mask, int patternQuarter)
449 {
450  unsigned baseLine = mask | ((line / 4) & 7);
451  unsigned scroll = vdp.getHorizontalScrollHigh();
452  const byte* namePtr = getNamePtr(line, scroll);
453  for (unsigned n = 0; n < 32; ++n) {
454  unsigned patternNr = patternQuarter | namePtr[scroll & 0x1F];
455  unsigned color = vram.patternTable.readNP((patternNr * 8) | baseLine);
456  Pixel cl = palFg[color >> 4];
457  Pixel cr = palFg[color & 0x0F];
458  pixelPtr[0] = cl; pixelPtr[1] = cl;
459  pixelPtr[2] = cl; pixelPtr[3] = cl;
460  pixelPtr[4] = cr; pixelPtr[5] = cr;
461  pixelPtr[6] = cr; pixelPtr[7] = cr;
462  pixelPtr += 8;
463  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
464  }
465 }
466 template <class Pixel>
468  Pixel* __restrict pixelPtr, int line)
469 {
470  int mask = (~0u << 11);
471  renderMultiHelper(pixelPtr, line, mask, 0);
472 }
473 
474 template <class Pixel>
476  Pixel* __restrict pixelPtr, int line)
477 {
478  int mask = (~0u << 13);
479  int patternQuarter = (line * 4) & ~0xFF; // (line / 8) * 32
480  renderMultiHelper(pixelPtr, line, mask, patternQuarter);
481 }
482 
483 template <class Pixel>
485  Pixel* __restrict pixelPtr)
486 {
487  Pixel fg = palFg[vdp.getForegroundColor()];
488  Pixel bg = palFg[vdp.getBackgroundColor()];
489  for (int n = 8; n--; ) *pixelPtr++ = bg;
490  for (int c = 40; c--; ) {
491  for (int n = 4; n--; ) *pixelPtr++ = fg;
492  for (int n = 2; n--; ) *pixelPtr++ = bg;
493  }
494  for (int n = 8; n--; ) *pixelPtr++ = bg;
495 }
496 
497 template <class Pixel>
499  Pixel* __restrict pixelPtr)
500 {
501  // when this is in effect, the VRAM is not refreshed anymore, but that
502  // is not emulated
503  for (int n = 256; n--; ) *pixelPtr++ = palFg[15];
504 }
505 
506 // Force template instantiation.
507 #if HAVE_16BPP
508 template class CharacterConverter<uint16_t>;
509 #endif
510 #if HAVE_32BPP || COMPONENT_GL
511 template class CharacterConverter<uint32_t>;
512 #endif
513 
514 } // namespace openmsx
int getBlinkBackgroundColor() const
Gets the current blinking color for blinking text.
Definition: VDP.hh:212
bool isContinuous(unsigned index, unsigned size) const
Is the given index range continuous in VRAM (iow there&#39;s no mirroring) Only if the range is continuou...
Definition: VDPVRAM.hh:190
int getBlinkForegroundColor() const
Gets the current blinking color for blinking text.
Definition: VDP.hh:205
byte readNP(unsigned index) const
Reads a byte from VRAM in its current state.
Definition: VDPVRAM.hh:253
VRAMWindow patternTable
Definition: VDPVRAM.hh:659
Represents a VDP display mode.
Definition: DisplayMode.hh:14
byte getVerticalScroll() const
Gets the current vertical scroll (line displayed at Y=0).
Definition: VDP.hh:269
VRAMWindow nameTable
Definition: VDPVRAM.hh:657
byte getHorizontalScrollHigh() const
Gets the current horizontal scroll higher bits.
Definition: VDP.hh:287
uint32_t Pixel
bool getBlinkState() const
Gets the current blink state.
Definition: VDP.hh:219
const byte * getReadArea(unsigned index, unsigned size) const
Gets a pointer to a contiguous part of the VRAM.
Definition: VDPVRAM.hh:219
VRAMWindow colorTable
Definition: VDPVRAM.hh:658
int getForegroundColor() const
Gets the current foreground color.
Definition: VDP.hh:181
CharacterConverter(VDP &vdp, const Pixel *palFg, const Pixel *palBg)
Create a new bitmap scanline converter.
bool isMSX1VDP() const
Is this an MSX1 VDP?
Definition: VDP.hh:92
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
unsigned char byte
8 bit unsigned integer
Definition: openmsx.hh:25
void convertLine(Pixel *linePtr, int line)
Convert a line of V9938 VRAM to 512 host pixels.
Unified implementation of MSX Video Display Processors (VDPs).
Definition: VDP.hh:60
byte getBase() const
Get the base dispay mode as an integer: M5..M1 combined.
Definition: DisplayMode.hh:123
Utility class for converting VRAM contents to host pixels.
void setDisplayMode(DisplayMode mode)
Select the display mode to use for scanline conversion.
int getBackgroundColor() const
Gets the current background color.
Definition: VDP.hh:193