openMSX
SymbolManager.cc
Go to the documentation of this file.
1#include "SymbolManager.hh"
2
4#include "File.hh"
5#include "Interpreter.hh"
6#include "TclObject.hh"
7
8#include "narrow.hh"
9#include "ranges.hh"
10#include "static_vector.hh"
11#include "stl.hh"
12#include "StringOp.hh"
13#include "unreachable.hh"
14#include "view.hh"
15
16#include <bit>
17#include <cassert>
18#include <fstream>
19
20namespace openmsx {
21
23{
24 switch (type) {
25 using enum Type;
26 case AUTO_DETECT: return "auto-detect";
27 case ASMSX: return "asMSX";
28 case GENERIC: return "generic";
29 case HTC: return "htc";
30 case LINKMAP: return "linkmap";
31 case NOICE: return "NoICE";
32 case VASM: return "vasm";
33 default: UNREACHABLE;
34 }
35}
36
37std::optional<SymbolFile::Type> SymbolFile::parseType(std::string_view str)
38{
39 using enum Type;
40 if (str == "auto-detect") return AUTO_DETECT;
41 if (str == "asMSX") return ASMSX;
42 if (str == "generic") return GENERIC;
43 if (str == "htc") return HTC;
44 if (str == "linkmap") return LINKMAP;
45 if (str == "NoICE") return NOICE;
46 if (str == "vasm") return VASM;
47 return {};
48}
49
50
52 : commandController(commandController_)
53{
54}
55
56// detection logic taken from old openmsx-debugger, could probably be improved.
57[[nodiscard]] SymbolFile::Type SymbolManager::detectType(std::string_view filename, std::string_view buffer)
58{
59 auto fname = StringOp::toLower(filename);
60
61 using enum SymbolFile::Type;
62 if (fname.ends_with(".noi")) {
63 // NoICE command file
64 return NOICE;
65 } else if (fname.ends_with(".map")) {
66 // HiTech link map file
67 return LINKMAP;
68 } else if (fname.ends_with(".sym")) {
69 // auto detect which sym file
70 auto [line, _] = StringOp::splitOnFirst(buffer, "\n\r");
71 if (line.starts_with("; Symbol table")) {
72 return ASMSX;
73 } else if (StringOp::containsCaseInsensitive(line, " %equ ")) { // TNIASM1
74 return GENERIC;
75 } else if (StringOp::containsCaseInsensitive(line, " equ ")) {
76 return GENERIC;
77 } else if (StringOp::containsCaseInsensitive(line, "Sections:")) {
78 return VASM;
79 } else {
80 // this is a blunt conclusion but I don't know a way
81 // to detect this file type
82 return HTC;
83 }
84 } else if (fname.ends_with(".symbol") || fname.ends_with(".publics") || fname.ends_with(".sys")) {
85 /* They are the same type of file. For some reason the Debian
86 * manpage uses the extension ".sys"
87 * pasmo doc -> pasmo [options] file.asm file.bin [file.symbol [file.publics] ]
88 * pasmo manpage in Debian -> pasmo [options] file.asm file.bin [file.sys]
89 */
90 return GENERIC; // pasmo
91 }
92 return GENERIC;
93}
94
96 std::string_view filename, std::string_view buffer, SymbolFile::Type type,
97 function_ref<std::optional<Symbol>(std::span<std::string_view>)> lineParser)
98{
99 SymbolFile result;
100 result.filename = filename;
101 result.type = type;
102
103 static constexpr std::string_view whitespace = " \t\r";
104 for (std::string_view fullLine : StringOp::split_view(buffer, '\n')) {
105 auto [line, _] = StringOp::splitOnFirst(fullLine, ';');
106
108 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 3 + 1)};
109 if (auto symbol = lineParser(tokens)) {
110 result.symbols.push_back(std::move(*symbol));
111 }
112 }
113
114 return result;
115}
116
117[[nodiscard]] std::optional<uint16_t> SymbolManager::parseValue(std::string_view str)
118{
119 if (str.ends_with('h') || str.ends_with('H')) { // hex
120 str.remove_suffix(1);
121 return StringOp::stringToBase<16, uint16_t>(str);
122 }
123 if (str.starts_with('$') || str.starts_with('#')) { // hex
124 str.remove_prefix(1);
125 return StringOp::stringToBase<16, uint16_t>(str);
126 }
127 if (str.starts_with('%')) { // bin
128 str.remove_prefix(1);
129 return StringOp::stringToBase<2, uint16_t>(str);
130 }
131 // this recognizes the prefixes "0x" or "0X" (for hexadecimal)
132 // and "0b" or "0B" (for binary)
133 // no prefix in interpreted as decimal
134 // "0" as a prefix for octal is intentionally NOT supported
135 return StringOp::stringTo<uint16_t>(str);
136}
137
138[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabel(std::string_view label, uint16_t value)
139{
140 if (label.ends_with(':')) label.remove_suffix(1);
141 if (label.empty()) return {};
142
143 return Symbol{std::string(label), value};
144}
145
146[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabelAndValue(std::string_view label, std::string_view value)
147{
148 if (auto num = parseValue(value)) {
149 return checkLabel(label, *num);
150 }
151 return {};
152}
153
154[[nodiscard]] SymbolFile SymbolManager::loadGeneric(std::string_view filename, std::string_view buffer)
155{
156 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
157 if (tokens.size() != 3) return {};
158 auto label = tokens[0];
159 auto equ = tokens[1];
160 auto value = tokens[2];
162 if (!cmp(equ, "equ") && // TNIASM0, PASMO, SJASM, ...
163 !cmp(equ, "%equ")) return {}; // TNIASM1
164 return checkLabelAndValue(label, value);
165 };
166 return loadLines(filename, buffer, SymbolFile::Type::GENERIC, parseLine);
167}
168
169[[nodiscard]] SymbolFile SymbolManager::loadNoICE(std::string_view filename, std::string_view buffer)
170{
171 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
172 if (tokens.size() != 3) return {};
173 auto def = tokens[0];
174 auto label = tokens[1];
175 auto value = tokens[2];
176 if (StringOp::casecmp cmp; !cmp(def, "def")) return {};
177 return checkLabelAndValue(label, value);
178 };
179 return loadLines(filename, buffer, SymbolFile::Type::NOICE, parseLine);
180}
181
182[[nodiscard]] SymbolFile SymbolManager::loadHTC(std::string_view filename, std::string_view buffer)
183{
184 // TODO check with real HTC file
185 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
186 if (tokens.size() != 3) return {};
187 auto label = tokens[0];
188 auto value = tokens[1];
189 // tokens[2] ???
190
191 auto val = StringOp::stringToBase<16, uint16_t>(value);
192 if (!val) return {};
193 return checkLabel(label, *val);
194 };
195 return loadLines(filename, buffer, SymbolFile::Type::HTC, parseLine);
196}
197
198[[nodiscard]] SymbolFile SymbolManager::loadVASM(std::string_view filename, std::string_view buffer)
199{
200 SymbolFile result;
201 result.filename = filename;
203
204 static constexpr std::string_view whitespace = " \t\r";
205 bool skipLines = true;
206 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
207 if (skipLines) {
208 if (line.starts_with("Symbols by value:")) {
209 skipLines = false;
210 }
211 continue;
212 }
213
215 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 2 + 1)};
216 if (tokens.size() != 2) continue;
217 auto value = tokens[0];
218 auto label = tokens[1];
219
220 if (auto val = StringOp::stringToBase<16, uint16_t>(value)) {
221 if (auto symbol = checkLabel(label, *val)) {
222 result.symbols.push_back(std::move(*symbol));
223 }
224 }
225 }
226
227 return result;
228}
229
230[[nodiscard]] SymbolFile SymbolManager::loadASMSX(std::string_view filename, std::string_view buffer)
231{
232 SymbolFile result;
233 result.filename = filename;
235
236 static constexpr std::string_view whitespace = " \t\r";
237 bool symbolPart = false;
238 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
239 if (line.starts_with(';')) {
240 if (line.starts_with("; global and local")) {
241 symbolPart = true;
242 } else if (line.starts_with("; other")) {
243 symbolPart = false;
244 }
245 continue;
246 }
247 if (!symbolPart) continue;
248
249 // Possible formats are: (checked in: https://github.com/Fubukimaru/asMSX/blob/master/src/dura.y#L3987)
250 // <abcd>h <name> with <abcd> a 4-digit hex value
251 // <xy>h:<abcd>h <name> <xy> a 2-digit hex indicating the MegaRom Page (ignored)
252 // <name> the symbol name
254 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 2 + 1)};
255 if (tokens.size() != 2) continue;
256 auto value = tokens[0];
257 auto label = tokens[1];
258
259 auto [f, l] = StringOp::splitOnFirst(value, ':');
260 value = l.empty() ? f : l;
261
262 if (auto symbol = checkLabelAndValue(label, value)) {
263 result.symbols.push_back(std::move(*symbol));
264 }
265 }
266
267 return result;
268}
269
270[[nodiscard]] std::optional<unsigned> SymbolManager::isHexDigit(char c)
271{
272 if ('0' <= c && c <= '9') return c - '0';
273 if ('A' <= c && c <= 'F') return c - 'A' + 10;
274 if ('a' <= c && c <= 'f') return c - 'a' + 10;
275 return {};
276}
277[[nodiscard]] std::optional<uint16_t> SymbolManager::is4DigitHex(std::string_view s)
278{
279 if (s.size() != 4) return {};
280 unsigned value = 0;
281 for (int i = 0; i < 4; ++i) {
282 auto digit = isHexDigit(s[i]);
283 if (!digit) return {};
284 value = (value << 4) | *digit;
285 }
286 return narrow<uint16_t>(value);
287}
288
289[[nodiscard]] SymbolFile SymbolManager::loadLinkMap(std::string_view filename, std::string_view buffer)
290{
291 // Hi-Tech C link map file. Here's an example of such a file:
292 // https://github.com/artrag/C-experiments-for-msx/blob/master/START.MAP
293 SymbolFile result;
294 result.filename = filename;
296
297 static constexpr std::string_view whitespace = " \t\r";
298 bool symbolPart = false;
299 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
300 if (!symbolPart) {
301 if (line.find("Symbol Table") != std::string_view::npos) { // c++23 contains()
302 symbolPart = true;
303 }
304 continue;
305 }
306 // Here's an example of a few lines:
307 // asllmod text 2CE7 asllsub text 0AEE
308 // cret text 2E58 csv text 2E4C
309 // float_or_long_used (abs) 0001 indir text 2E5F
310 // Note:
311 // * Multiple (2 in this case) symbols are defined in a single line.
312 // * The width of the columns seems to be the same within a single file, but not across files (?)
313 // * Looking at a single symbol:
314 // * There are 3 columns: name, psect, value
315 // * BUT the psect column can be empty!!!
316 // This in combination with an unknown column-width makes parsing difficult.
317 // The heuristic we use is that the last column must match: [0-9A-Fa-f]{4}
318 auto tokens = StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace);
319 auto it = tokens.begin();
320 auto et = tokens.end();
321 while (it != et) {
322 auto label = *it++;
323
324 if (it == et) break;
325 auto value = *it++; // this could either be the psect or the value column
326 if (auto val = is4DigitHex(value)) {
327 result.symbols.emplace_back(std::string(label), *val);
328 continue;
329 }
330
331 if (it == et) break;
332 value = *it++; // try again with 3rd column
333 auto val = is4DigitHex(value);
334 if (!val) break; // if this also doesn't work there's something wrong, skip this line
335 result.symbols.emplace_back(std::string(label), *val);
336 }
337 }
338
339 return result;
340}
341
342[[nodiscard]] SymbolFile SymbolManager::loadSymbolFile(const std::string& filename, SymbolFile::Type type)
343{
344 File file(filename);
345 auto buf = file.mmap();
346 std::string_view buffer(std::bit_cast<const char*>(buf.data()), buf.size());
347
348 using enum SymbolFile::Type;
349 if (type == AUTO_DETECT) {
350 type = detectType(filename, buffer);
351 }
352 assert(type != AUTO_DETECT);
353
354 switch (type) {
355 case ASMSX:
356 return loadASMSX(filename, buffer);
357 case GENERIC:
358 return loadGeneric(filename, buffer);
359 case HTC:
360 return loadHTC(filename, buffer);
361 case LINKMAP:
362 return loadLinkMap(filename, buffer);
363 case NOICE:
364 return loadNoICE(filename, buffer);
365 case VASM:
366 return loadVASM(filename, buffer);
367 default: UNREACHABLE;
368 }
369}
370
371void SymbolManager::refresh()
372{
373 // Drop caches
374 lookupValueCache.clear();
375
376 // Allow to access symbol-values in Tcl expression with syntax: $sym(JIFFY)
377 auto& interp = commandController.getInterpreter();
378 TclObject arrayName("sym");
379 interp.unsetVariable(arrayName.getString().c_str());
380 for (const auto& file : files) {
381 for (const auto& sym : file.symbols) {
382 interp.setVariable(arrayName, TclObject(sym.name), TclObject(sym.value));
383 }
384 }
385
386 if (observer) observer->notifySymbolsChanged();
387}
388
389bool SymbolManager::reloadFile(const std::string& filename, LoadEmpty loadEmpty, SymbolFile::Type type)
390{
391 auto file = loadSymbolFile(filename, type); // might throw
392 if (file.symbols.empty() && loadEmpty == LoadEmpty::NOT_ALLOWED) return false;
393
394 if (auto it = ranges::find(files, filename, &SymbolFile::filename);
395 it == files.end()) {
396 files.push_back(std::move(file));
397 } else {
398 *it = std::move(file);
399 }
400 refresh();
401 return true;
402}
403
404void SymbolManager::removeFile(std::string_view filename)
405{
406 auto it = ranges::find(files, filename, &SymbolFile::filename);
407 if (it == files.end()) return; // not found
408 files.erase(it);
409 refresh();
410}
411
413{
414 files.clear();
415 refresh();
416}
417
418std::optional<uint16_t> SymbolManager::parseSymbolOrValue(std::string_view str) const
419{
420 // linear search is fine: only used interactively
421 // prefer an exact match
422 for (const auto& file : files) {
423 if (auto it = ranges::find(file.symbols, str, &Symbol::name);
424 it != file.symbols.end()) {
425 return it->value;
426 }
427 }
428 // but if not found, a case-insensitive match is fine as well
429 for (const auto& file : files) {
430 if (auto it = ranges::find_if(file.symbols, [&](const auto& sym) {
431 return StringOp::casecmp{}(str, sym.name); });
432 it != file.symbols.end()) {
433 return it->value;
434 }
435 }
436 // also not found, then try to parse as a numerical value
437 return parseValue(str);
438}
439
440std::span<Symbol const * const> SymbolManager::lookupValue(uint16_t value)
441{
442 if (lookupValueCache.empty()) {
443 for (const auto& file : files) {
444 for (const auto& sym : file.symbols) {
445 auto [it, inserted] = lookupValueCache.try_emplace(sym.value, std::vector<const Symbol*>{});
446 it->second.push_back(&sym);
447 }
448 }
449 }
450 if (auto* sym = lookup(lookupValueCache, value)) {
451 return *sym;
452 }
453 return {};
454}
455
456std::string SymbolManager::getFileFilters()
457{
458 return "Auto-detect file type (*){.*},"
459 "asMSX 0.x symbol files (*.sym){.sym},"
460 "HiTech C link map files (*.map){.map},"
461 "HiTech C symbol files (*.sym){.sym},"
462 "NoICE command files (*.noi){.noi},"
463 "pasmo symbol files (*.symbol *.publics *.sys){.symbol,.publics,.sys},"
464 "tniASM 0.x symbol files (*.sym){.sym},"
465 "tniASM 1.x symbol files (*.sym){.sym},"
466 "vasm symbol files (*.sym){.sym}";
467}
468
469SymbolFile::Type SymbolManager::getTypeForFilter(std::string_view filter)
470{
471 using enum SymbolFile::Type;
472 if (filter.starts_with("Auto")) {
473 return AUTO_DETECT;
474 } else if (filter.starts_with("asMSX")) {
475 return ASMSX;
476 } else if (filter.starts_with("HiTechC link")) {
477 return LINKMAP;
478 } else if (filter.starts_with("HiTechC symbol")) {
479 return HTC;
480 } else if (filter.starts_with("NoICE")) {
481 return NOICE;
482 } else if (filter.starts_with("vasm")) {
483 return VASM;
484 } else {
485 return GENERIC;
486 }
487}
488
489} // namespace openmsx
virtual Interpreter & getInterpreter()=0
std::span< const uint8_t > mmap()
Map file in memory.
Definition File.cc:102
static std::optional< unsigned > isHexDigit(char c)
bool reloadFile(const std::string &filename, LoadEmpty loadEmpty, SymbolFile::Type type)
static SymbolFile loadLines(std::string_view filename, std::string_view buffer, SymbolFile::Type type, function_ref< std::optional< Symbol >(std::span< std::string_view >)> lineParser)
static SymbolFile::Type detectType(std::string_view filename, std::string_view buffer)
static SymbolFile loadNoICE(std::string_view filename, std::string_view buffer)
static SymbolFile loadHTC(std::string_view filename, std::string_view buffer)
static SymbolFile loadSymbolFile(const std::string &filename, SymbolFile::Type type)
static SymbolFile loadGeneric(std::string_view filename, std::string_view buffer)
static SymbolFile loadVASM(std::string_view filename, std::string_view buffer)
static std::optional< uint16_t > is4DigitHex(std::string_view s)
void removeFile(std::string_view filename)
static std::optional< Symbol > checkLabel(std::string_view label, uint16_t value)
static SymbolFile loadASMSX(std::string_view filename, std::string_view buffer)
static std::optional< uint16_t > parseValue(std::string_view str)
std::optional< uint16_t > parseSymbolOrValue(std::string_view s) const
static SymbolFile loadLinkMap(std::string_view filename, std::string_view buffer)
SymbolManager(CommandController &commandController)
static std::optional< Symbol > checkLabelAndValue(std::string_view label, std::string_view value)
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition hash_map.hh:118
std::pair< string_view, string_view > splitOnFirst(string_view str, string_view chars)
Definition StringOp.cc:95
std::string toLower(std::string_view str)
Definition StringOp.cc:26
bool containsCaseInsensitive(std::string_view haystack, std::string_view needle)
Definition StringOp.hh:181
auto split_view(std::string_view str, Separators separators)
Definition StringOp.hh:83
This file implemented 3 utility functions:
Definition Autofire.cc:11
auto find_if(InputRange &&range, UnaryPredicate pred)
Definition ranges.hh:175
auto find(InputRange &&range, const T &value)
Definition ranges.hh:162
constexpr auto take(ForwardRange &&range, size_t n)
Definition view.hh:544
constexpr from_range_t from_range
static std::optional< Type > parseType(std::string_view str)
static zstring_view toString(Type type)
std::vector< Symbol > symbols
virtual void notifySymbolsChanged()=0
std::string name
#define UNREACHABLE