openMSX
CharacterConverter.cc
Go to the documentation of this file.
1/*
2TODO:
3- Clean up renderGraphics2, it is currently very hard to understand
4 with all the masks and quarters etc.
5- Correctly implement vertical scroll in text modes.
6 Can be implemented by reordering blitting, but uses a smaller
7 wrap than GFX modes: 8 lines instead of 256 lines.
8*/
9
10#include "CharacterConverter.hh"
11#include "VDP.hh"
12#include "VDPVRAM.hh"
13#include "ranges.hh"
14#include "xrange.hh"
15#include "build-info.hh"
16#include "components.hh"
17#include <cstdint>
18
19#ifdef __SSE2__
20#include "emmintrin.h" // SSE2
21#endif
22
23namespace openmsx {
24
25template<std::unsigned_integral Pixel>
27 VDP& vdp_, std::span<const Pixel, 16> palFg_, std::span<const Pixel, 16> palBg_)
28 : vdp(vdp_), vram(vdp.getVRAM()), palFg(palFg_), palBg(palBg_)
29{
30}
31
32template<std::unsigned_integral Pixel>
34{
35 modeBase = mode.getBase();
36 assert(modeBase < 0x0C);
37}
38
39template<std::unsigned_integral Pixel>
40void CharacterConverter<Pixel>::convertLine(std::span<Pixel> buf, int line)
41{
42 // TODO: Support YJK on modes other than Graphic 6/7.
43 switch (modeBase) {
44 case DisplayMode::GRAPHIC1: // screen 1
45 renderGraphic1(subspan<256>(buf), line);
46 break;
47 case DisplayMode::TEXT1: // screen 0, width 40
48 renderText1(subspan<256>(buf), line);
49 break;
50 case DisplayMode::MULTICOLOR: // screen 3
51 renderMulti(subspan<256>(buf), line);
52 break;
53 case DisplayMode::GRAPHIC2: // screen 2
54 renderGraphic2(subspan<256>(buf), line);
55 break;
56 case DisplayMode::GRAPHIC3: // screen 4
57 renderGraphic2(subspan<256>(buf), line); // graphic3, actually
58 break;
59 case DisplayMode::TEXT2: // screen 0, width 80
60 renderText2(subspan<512>(buf), line);
61 break;
62 case DisplayMode::TEXT1Q: // TMSxxxx only
63 if (vdp.isMSX1VDP()) {
64 renderText1Q(subspan<256>(buf), line);
65 } else {
66 renderBlank (subspan<256>(buf));
67 }
68 break;
69 case DisplayMode::MULTIQ: // TMSxxxx only
70 if (vdp.isMSX1VDP()) {
71 renderMultiQ(subspan<256>(buf), line);
72 } else {
73 renderBlank (subspan<256>(buf));
74 }
75 break;
76 default: // remaining (non-bitmap) modes
77 if (vdp.isMSX1VDP()) {
78 renderBogus(subspan<256>(buf));
79 } else {
80 renderBlank(subspan<256>(buf));
81 }
82 }
83}
84
85#ifdef __SSE2__
86// Copied from Scale2xScaler.cc, TODO move to common location?
87static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
88{
89 return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
90}
91#endif
92
93template<std::unsigned_integral Pixel> static inline void draw6(
94 Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
95{
96 pixelPtr[0] = (pattern & 0x80) ? fg : bg;
97 pixelPtr[1] = (pattern & 0x40) ? fg : bg;
98 pixelPtr[2] = (pattern & 0x20) ? fg : bg;
99 pixelPtr[3] = (pattern & 0x10) ? fg : bg;
100 pixelPtr[4] = (pattern & 0x08) ? fg : bg;
101 pixelPtr[5] = (pattern & 0x04) ? fg : bg;
102 pixelPtr += 6;
103}
104
105template<std::unsigned_integral Pixel> static inline void draw8(
106 Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
107{
108#ifdef __SSE2__
109 // SSE2 version, 32bpp (16bpp is possible, but not worth it anymore)
110 if constexpr (sizeof(Pixel) == 4) {
111 const __m128i m74 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
112 const __m128i m30 = _mm_set_epi32(0x01, 0x02, 0x04, 0x08);
113 const __m128i zero = _mm_setzero_si128();
114
115 __m128i fg4 = _mm_set1_epi32(fg);
116 __m128i bg4 = _mm_set1_epi32(bg);
117 __m128i pat = _mm_set1_epi32(pattern);
118
119 __m128i b74 = _mm_cmpeq_epi32(_mm_and_si128(pat, m74), zero);
120 __m128i b30 = _mm_cmpeq_epi32(_mm_and_si128(pat, m30), zero);
121
122 auto* out = reinterpret_cast<__m128i*>(pixelPtr);
123 _mm_storeu_si128(out + 0, select(fg4, bg4, b74));
124 _mm_storeu_si128(out + 1, select(fg4, bg4, b30));
125 pixelPtr += 8;
126 return;
127 }
128#endif
129
130 // C++ version
131 pixelPtr[0] = (pattern & 0x80) ? fg : bg;
132 pixelPtr[1] = (pattern & 0x40) ? fg : bg;
133 pixelPtr[2] = (pattern & 0x20) ? fg : bg;
134 pixelPtr[3] = (pattern & 0x10) ? fg : bg;
135 pixelPtr[4] = (pattern & 0x08) ? fg : bg;
136 pixelPtr[5] = (pattern & 0x04) ? fg : bg;
137 pixelPtr[6] = (pattern & 0x02) ? fg : bg;
138 pixelPtr[7] = (pattern & 0x01) ? fg : bg;
139 pixelPtr += 8;
140}
141
142template<std::unsigned_integral Pixel>
143void CharacterConverter<Pixel>::renderText1(std::span<Pixel, 256> buf, int line)
144{
145 Pixel fg = palFg[vdp.getForegroundColor()];
146 Pixel bg = palFg[vdp.getBackgroundColor()];
147
148 // 8 * 256 is small enough to always be contiguous
149 auto patternArea = vram.patternTable.getReadArea<256 * 8>(0);
150 auto l = (line + vdp.getVerticalScroll()) & 7;
151
152 // Note: Because line width is not a power of two, reading an entire line
153 // from a VRAM pointer returned by readArea will not wrap the index
154 // correctly. Therefore we read one character at a time.
155 unsigned nameStart = (line / 8) * 40;
156 unsigned nameEnd = nameStart + 40;
157 Pixel* __restrict pixelPtr = buf.data();
158 for (auto name : xrange(nameStart, nameEnd)) {
159 unsigned charCode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
160 auto pattern = patternArea[l + charCode * 8];
161 draw6(pixelPtr, fg, bg, pattern);
162 }
163}
164
165template<std::unsigned_integral Pixel>
166void CharacterConverter<Pixel>::renderText1Q(std::span<Pixel, 256> buf, int line)
167{
168 Pixel fg = palFg[vdp.getForegroundColor()];
169 Pixel bg = palFg[vdp.getBackgroundColor()];
170
171 unsigned patternBaseLine = (~0u << 13) | ((line + vdp.getVerticalScroll()) & 7);
172
173 // Note: Because line width is not a power of two, reading an entire line
174 // from a VRAM pointer returned by readArea will not wrap the index
175 // correctly. Therefore we read one character at a time.
176 unsigned nameStart = (line / 8) * 40;
177 unsigned nameEnd = nameStart + 40;
178 unsigned patternQuarter = (line & 0xC0) << 2;
179 Pixel* __restrict pixelPtr = buf.data();
180 for (auto name : xrange(nameStart, nameEnd)) {
181 unsigned charCode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
182 unsigned patternNr = patternQuarter | charCode;
183 auto pattern = vram.patternTable.readNP(
184 patternBaseLine | (patternNr * 8));
185 draw6(pixelPtr, fg, bg, pattern);
186 }
187}
188
189template<std::unsigned_integral Pixel>
190void CharacterConverter<Pixel>::renderText2(std::span<Pixel, 512> buf, int line)
191{
192 Pixel plainFg = palFg[vdp.getForegroundColor()];
193 Pixel plainBg = palFg[vdp.getBackgroundColor()];
194 Pixel blinkFg, blinkBg;
195 if (vdp.getBlinkState()) {
196 int fg = vdp.getBlinkForegroundColor();
197 blinkFg = palBg[fg ? fg : vdp.getBlinkBackgroundColor()];
198 blinkBg = palBg[vdp.getBlinkBackgroundColor()];
199 } else {
200 blinkFg = plainFg;
201 blinkBg = plainBg;
202 }
203
204 // 8 * 256 is small enough to always be contiguous
205 auto patternArea = vram.patternTable.getReadArea<256 * 8>(0);
206 auto l = (line + vdp.getVerticalScroll()) & 7;
207
208 unsigned colorStart = (line / 8) * (80 / 8);
209 unsigned nameStart = (line / 8) * 80;
210 Pixel* __restrict pixelPtr = buf.data();
211 for (auto i : xrange(80 / 8)) {
212 unsigned colorPattern = vram.colorTable.readNP(
213 (colorStart + i) | (~0u << 9));
214 auto nameArea = vram.nameTable.getReadArea<8>(
215 (nameStart + 8 * i) | (~0u << 12));
216 draw6(pixelPtr,
217 (colorPattern & 0x80) ? blinkFg : plainFg,
218 (colorPattern & 0x80) ? blinkBg : plainBg,
219 patternArea[l + nameArea[0] * 8]);
220 draw6(pixelPtr,
221 (colorPattern & 0x40) ? blinkFg : plainFg,
222 (colorPattern & 0x40) ? blinkBg : plainBg,
223 patternArea[l + nameArea[1] * 8]);
224 draw6(pixelPtr,
225 (colorPattern & 0x20) ? blinkFg : plainFg,
226 (colorPattern & 0x20) ? blinkBg : plainBg,
227 patternArea[l + nameArea[2] * 8]);
228 draw6(pixelPtr,
229 (colorPattern & 0x10) ? blinkFg : plainFg,
230 (colorPattern & 0x10) ? blinkBg : plainBg,
231 patternArea[l + nameArea[3] * 8]);
232 draw6(pixelPtr,
233 (colorPattern & 0x08) ? blinkFg : plainFg,
234 (colorPattern & 0x08) ? blinkBg : plainBg,
235 patternArea[l + nameArea[4] * 8]);
236 draw6(pixelPtr,
237 (colorPattern & 0x04) ? blinkFg : plainFg,
238 (colorPattern & 0x04) ? blinkBg : plainBg,
239 patternArea[l + nameArea[5] * 8]);
240 draw6(pixelPtr,
241 (colorPattern & 0x02) ? blinkFg : plainFg,
242 (colorPattern & 0x02) ? blinkBg : plainBg,
243 patternArea[l + nameArea[6] * 8]);
244 draw6(pixelPtr,
245 (colorPattern & 0x01) ? blinkFg : plainFg,
246 (colorPattern & 0x01) ? blinkBg : plainBg,
247 patternArea[l + nameArea[7] * 8]);
248 }
249}
250
251template<std::unsigned_integral Pixel>
252std::span<const byte, 32> CharacterConverter<Pixel>::getNamePtr(int line, int scroll)
253{
254 // no need to test whether multi-page scrolling is enabled,
255 // indexMask in the nameTable already takes care of it
256 return vram.nameTable.getReadArea<32>(
257 ((line / 8) * 32) | ((scroll & 0x20) ? 0x8000 : 0));
258}
259template<std::unsigned_integral Pixel>
260void CharacterConverter<Pixel>::renderGraphic1(std::span<Pixel, 256> buf, int line)
261{
262 auto patternArea = vram.patternTable.getReadArea<256 * 8>(0);
263 auto l = line & 7;
264 auto colorArea = vram.colorTable.getReadArea<256 / 8>(0);
265
266 int scroll = vdp.getHorizontalScrollHigh();
267 auto namePtr = getNamePtr(line, scroll);
268 Pixel* __restrict pixelPtr = buf.data();
269 repeat(32, [&] {
270 auto charCode = namePtr[scroll & 0x1F];
271 auto pattern = patternArea[l + charCode * 8];
272 auto color = colorArea[charCode / 8];
273 Pixel fg = palFg[color >> 4];
274 Pixel bg = palFg[color & 0x0F];
275 draw8(pixelPtr, fg, bg, pattern);
276 if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
277 });
278}
279
280template<std::unsigned_integral Pixel>
281void CharacterConverter<Pixel>::renderGraphic2(std::span<Pixel, 256> buf, int line)
282{
283 int quarter8 = (((line / 8) * 32) & ~0xFF) * 8;
284 int line7 = line & 7;
285 int scroll = vdp.getHorizontalScrollHigh();
286 auto namePtr = getNamePtr(line, scroll);
287
288 Pixel* __restrict pixelPtr = buf.data();
289 if (vram.colorTable .isContinuous((8 * 256) - 1) &&
290 vram.patternTable.isContinuous((8 * 256) - 1) &&
291 ((scroll & 0x1f) == 0)) {
292 // Both color and pattern table can be accessed contiguously
293 // (no mirroring) and there's no v9958 horizontal scrolling.
294 // This is very common, so make an optimized version for this.
295 auto patternArea = vram.patternTable.getReadArea<256 * 8>(quarter8);
296 auto colorArea = vram.colorTable .getReadArea<256 * 8>(quarter8);
297 for (auto n : xrange(32)) {
298 auto charCode8 = namePtr[n] * 8;
299 auto pattern = patternArea[line7 + charCode8];
300 auto color = colorArea [line7 + charCode8];
301 Pixel fg = palFg[color >> 4];
302 Pixel bg = palFg[color & 0x0F];
303 draw8(pixelPtr, fg, bg, pattern);
304 }
305 } else {
306 // Slower variant, also works when:
307 // - there is mirroring in the color table
308 // - there is mirroring in the pattern table (TMS9929)
309 // - V9958 horizontal scroll feature is used
310 unsigned baseLine = (~0u << 13) | quarter8 | line7;
311 repeat(32, [&] {
312 unsigned charCode8 = namePtr[scroll & 0x1F] * 8;
313 unsigned index = charCode8 | baseLine;
314 auto pattern = vram.patternTable.readNP(index);
315 auto color = vram.colorTable .readNP(index);
316 Pixel fg = palFg[color >> 4];
317 Pixel bg = palFg[color & 0x0F];
318 draw8(pixelPtr, fg, bg, pattern);
319 if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
320 });
321 }
322}
323
324template<std::unsigned_integral Pixel>
325void CharacterConverter<Pixel>::renderMultiHelper(
326 Pixel* __restrict pixelPtr, int line,
327 unsigned mask, unsigned patternQuarter)
328{
329 unsigned baseLine = mask | ((line / 4) & 7);
330 unsigned scroll = vdp.getHorizontalScrollHigh();
331 auto namePtr = getNamePtr(line, scroll);
332 repeat(32, [&] {
333 unsigned patternNr = patternQuarter | namePtr[scroll & 0x1F];
334 unsigned color = vram.patternTable.readNP((patternNr * 8) | baseLine);
335 Pixel cl = palFg[color >> 4];
336 Pixel cr = palFg[color & 0x0F];
337 pixelPtr[0] = cl; pixelPtr[1] = cl;
338 pixelPtr[2] = cl; pixelPtr[3] = cl;
339 pixelPtr[4] = cr; pixelPtr[5] = cr;
340 pixelPtr[6] = cr; pixelPtr[7] = cr;
341 pixelPtr += 8;
342 if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
343 });
344}
345template<std::unsigned_integral Pixel>
346void CharacterConverter<Pixel>::renderMulti(std::span<Pixel, 256> buf, int line)
347{
348 unsigned mask = (~0u << 11);
349 renderMultiHelper(buf.data(), line, mask, 0);
350}
351
352template<std::unsigned_integral Pixel>
353void CharacterConverter<Pixel>::renderMultiQ(
354 std::span<Pixel, 256> buf, int line)
355{
356 unsigned mask = (~0u << 13);
357 unsigned patternQuarter = (line * 4) & ~0xFF; // (line / 8) * 32
358 renderMultiHelper(buf.data(), line, mask, patternQuarter);
359}
360
361template<std::unsigned_integral Pixel>
362void CharacterConverter<Pixel>::renderBogus(std::span<Pixel, 256> buf)
363{
364 Pixel* __restrict pixelPtr = buf.data();
365 Pixel fg = palFg[vdp.getForegroundColor()];
366 Pixel bg = palFg[vdp.getBackgroundColor()];
367 auto draw = [&](int n, Pixel col) {
368 pixelPtr = std::fill_n(pixelPtr, n, col);
369 };
370 draw(8, bg);
371 repeat(40, [&] {
372 draw(4, fg);
373 draw(2, bg);
374 });
375 draw(8, bg);
376}
377
378template<std::unsigned_integral Pixel>
379void CharacterConverter<Pixel>::renderBlank(std::span<Pixel, 256> buf)
380{
381 // when this is in effect, the VRAM is not refreshed anymore, but that
382 // is not emulated
383 ranges::fill(buf, palFg[15]);
384}
385
386// Force template instantiation.
387#if HAVE_16BPP
388template class CharacterConverter<uint16_t>;
389#endif
390#if HAVE_32BPP || COMPONENT_GL
391template class CharacterConverter<uint32_t>;
392#endif
393
394} // namespace openmsx
void convertLine(std::span< Pixel > buf, int line)
Convert a line of V9938 VRAM to 256 or 512 host pixels.
CharacterConverter(VDP &vdp, std::span< const Pixel, 16 > palFg, std::span< const Pixel, 16 > palBg)
Create a new bitmap scanline converter.
void setDisplayMode(DisplayMode mode)
Select the display mode to use for scanline conversion.
Represents a VDP display mode.
Definition: DisplayMode.hh:16
constexpr byte getBase() const
Get the base display mode as an integer: M5..M1 combined.
Definition: DisplayMode.hh:108
Unified implementation of MSX Video Display Processors (VDPs).
Definition: VDP.hh:64
This file implemented 3 utility functions:
Definition: Autofire.cc:9
uint32_t Pixel
constexpr void fill(ForwardRange &&range, const T &value)
Definition: ranges.hh:287
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
Definition: xrange.hh:147
constexpr auto xrange(T e)
Definition: xrange.hh:132