openMSX
XSAExtractor.cc
Go to the documentation of this file.
1#include "XSAExtractor.hh"
2
3#include "MSXException.hh"
4
5#include "narrow.hh"
6#include "xrange.hh"
7
8namespace openmsx {
9
10XSAExtractor::XSAExtractor(std::span<const uint8_t> file_)
11 : file(file_)
12{
13 if ((charIn() != 'P') || (charIn() != 'C') ||
14 (charIn() != 'K') || (charIn() != '\010')) {
15 throw MSXException("Not an XSA image");
16 }
17
18 chkHeader();
19 initHufInfo(); // initialize the cpDist tables
20 unLz77();
21}
22
23std::vector<SectorBuffer> XSAExtractor::extractData() &&
24{
25 // destroys internal outBuf, but that's ok
26 return std::move(output);
27}
28
29// Get the next character from the input buffer
30uint8_t XSAExtractor::charIn()
31{
32 if (file.empty()) {
33 throw MSXException("Corrupt XSA image: unexpected end of file");
34 }
35 auto result = file.front();
36 file = file.subspan(1);
37 return result;
38}
39
40// check fileheader
41void XSAExtractor::chkHeader()
42{
43 // read original length (little endian)
44 unsigned outBufLen = 0;
45 for (auto i : xrange(4)) {
46 outBufLen |= charIn() << (8 * i);
47 }
48 auto sectors = (outBufLen + 511) / 512;
49 output.resize(sectors);
50
51 // skip compressed length
52 repeat(4, [&]{ (void)charIn(); });
53
54 // skip original filename
55 while (charIn()) /*empty*/;
56}
57
58// the actual decompression algorithm itself
59void XSAExtractor::unLz77()
60{
61 bitCnt = 0; // no bits read yet
62
63 size_t remaining = output.size() * sizeof(SectorBuffer);
64 std::span out{output.data()->raw.data(), remaining};
65 size_t outIdx = 0;
66 while (true) {
67 if (bitIn()) {
68 // 1-bit
69 unsigned strLen = rdStrLen();
70 if (strLen == (MAX_STR_LEN + 1)) {
71 return;
72 }
73 unsigned strPos = rdStrPos();
74 if ((strPos == 0) || (strPos > outIdx)) {
75 throw MSXException(
76 "Corrupt XSA image: invalid offset");
77 }
78 if (remaining < strLen) {
79 throw MSXException(
80 "Invalid XSA image: too small output buffer");
81 }
82 remaining -= strLen;
83 while (strLen--) {
84 out[outIdx] = out[outIdx - strPos];
85 ++outIdx;
86 }
87 } else {
88 // 0-bit
89 if (remaining == 0) {
90 throw MSXException(
91 "Invalid XSA image: too small output buffer");
92 }
93 --remaining;
94 out[outIdx++] = charIn();
95 }
96 }
97}
98
99// read string length
100unsigned XSAExtractor::rdStrLen()
101{
102 if (!bitIn()) return 2;
103 if (!bitIn()) return 3;
104 if (!bitIn()) return 4;
105
106 uint8_t nrBits = 2;
107 while ((nrBits != 7) && bitIn()) {
108 ++nrBits;
109 }
110
111 unsigned len = 1;
112 while (nrBits--) {
113 len = (len << 1) | (bitIn() ? 1 : 0);
114 }
115 return (len + 1);
116}
117
118// read string pos
119int XSAExtractor::rdStrPos()
120{
121 HufNode* hufPos = &hufTbl[2 * TBL_SIZE - 2];
122
123 while (hufPos->child1) {
124 if (bitIn()) {
125 hufPos = hufPos->child2;
126 } else {
127 hufPos = hufPos->child1;
128 }
129 }
130 auto cpdIndex = narrow<uint8_t>(hufPos - &hufTbl[0]);
131 ++tblSizes[cpdIndex];
132
133 auto getNBits = [&](unsigned n) {
134 assert(n <= 8);
135 uint8_t result = 0;
136 repeat(n, [&]{
137 result = uint8_t((result << 1) | (bitIn() ? 1 : 0));
138 });
139 return result;
140 };
141 int strPos = [&] {
142 if (cpdExt[cpdIndex] >= 8) {
143 uint8_t strPosLsb = charIn();
144 uint8_t strPosMsb = getNBits(narrow_cast<uint8_t>(cpdExt[cpdIndex] - 8));
145 return strPosLsb + 256 * strPosMsb;
146 } else {
147 return int(getNBits(cpdExt[cpdIndex]));
148 }
149 }();
150 if ((updHufCnt--) == 0) {
151 mkHufTbl(); // make the huffman table
152 }
153 return strPos + cpDist[cpdIndex];
154}
155
156// read a bit from the input file
157bool XSAExtractor::bitIn()
158{
159 if (bitCnt == 0) {
160 bitFlg = charIn(); // read bitFlg
161 bitCnt = 8; // 8 bits left
162 }
163 bool temp = bitFlg & 1;
164 --bitCnt; // 1 bit less
165 bitFlg >>= 1;
166
167 return temp;
168}
169
170// initialize the huffman info tables
171void XSAExtractor::initHufInfo()
172{
173 int offs = 1;
174 for (auto i : xrange(TBL_SIZE)) {
175 cpDist[i] = offs;
176 offs += 1 << cpdExt[i];
177 }
178 cpDist[TBL_SIZE] = offs;
179
180 for (auto i : xrange(TBL_SIZE)) {
181 tblSizes[i] = 0; // reset the table counters
182 hufTbl[i].child1 = nullptr; // mark the leave nodes
183 }
184 mkHufTbl(); // make the huffman table
185}
186
187// Make huffman coding info
188void XSAExtractor::mkHufTbl()
189{
190 // Initialize the huffman tree
191 HufNode* hufPos = &hufTbl[0];
192 for (auto i : xrange(TBL_SIZE)) {
193 (hufPos++)->weight = 1 + (tblSizes[i] >>= 1);
194 }
195 for (int i = TBL_SIZE; i != 2 * TBL_SIZE - 1; ++i) {
196 (hufPos++)->weight = -1;
197 }
198 // Place the nodes in the correct manner in the tree
199 while (hufTbl[2 * TBL_SIZE - 2].weight == -1) {
200 for (hufPos = &hufTbl[0]; !(hufPos->weight); ++hufPos) {
201 // nothing
202 }
203 HufNode* l1Pos = hufPos++;
204 while (!(hufPos->weight)) {
205 ++hufPos;
206 }
207 HufNode* l2Pos = [&] {
208 if (hufPos->weight < l1Pos->weight) {
209 auto* tmp = l1Pos;
210 l1Pos = hufPos++;
211 return tmp;
212 } else {
213 return hufPos++;
214 }
215 }();
216 int tempW;
217 while ((tempW = hufPos->weight) != -1) {
218 if (tempW) {
219 if (tempW < l1Pos->weight) {
220 l2Pos = l1Pos;
221 l1Pos = hufPos;
222 } else if (tempW < l2Pos->weight) {
223 l2Pos = hufPos;
224 }
225 }
226 ++hufPos;
227 }
228 hufPos->weight = l1Pos->weight + l2Pos->weight;
229 (hufPos->child1 = l1Pos)->weight = 0;
230 (hufPos->child2 = l2Pos)->weight = 0;
231 }
232 updHufCnt = MAX_HUF_CNT;
233}
234
235} // namespace openmsx
std::vector< SectorBuffer > extractData() &&
XSAExtractor(std::span< const uint8_t > file)
This file implemented 3 utility functions:
Definition Autofire.cc:11
constexpr void repeat(T n, Op op)
Repeat the given operation 'op' 'n' times.
Definition xrange.hh:147
constexpr auto xrange(T e)
Definition xrange.hh:132