openMSX
SymbolManager.cc
Go to the documentation of this file.
1#include "SymbolManager.hh"
2
4#include "File.hh"
5#include "Interpreter.hh"
6#include "TclObject.hh"
7
8#include "narrow.hh"
9#include "ranges.hh"
10#include "static_vector.hh"
11#include "stl.hh"
12#include "StringOp.hh"
13#include "unreachable.hh"
14#include "view.hh"
15
16#include <bit>
17#include <cassert>
18#include <fstream>
19
20namespace openmsx {
21
23{
24 switch (type) {
25 using enum Type;
26 case AUTO_DETECT: return "auto-detect";
27 case ASMSX: return "asMSX";
28 case GENERIC: return "generic";
29 case HTC: return "htc";
30 case LINKMAP: return "linkmap";
31 case NOICE: return "NoICE";
32 case VASM: return "vasm";
33 case WLALINK_NOGMB: return "wlalink";
34 default: UNREACHABLE;
35 }
36}
37
38std::optional<SymbolFile::Type> SymbolFile::parseType(std::string_view str)
39{
40 using enum Type;
41 if (str == "auto-detect") return AUTO_DETECT;
42 if (str == "asMSX") return ASMSX;
43 if (str == "generic") return GENERIC;
44 if (str == "htc") return HTC;
45 if (str == "linkmap") return LINKMAP;
46 if (str == "NoICE") return NOICE;
47 if (str == "vasm") return VASM;
48 if (str == "wlalink") return WLALINK_NOGMB;
49 return {};
50}
51
52
54 : commandController(commandController_)
55{
56}
57
58// detection logic taken from old openmsx-debugger, could probably be improved.
59[[nodiscard]] SymbolFile::Type SymbolManager::detectType(std::string_view filename, std::string_view buffer)
60{
61 auto fname = StringOp::toLower(filename);
62
63 using enum SymbolFile::Type;
64 if (fname.ends_with(".noi")) {
65 // NoICE command file
66 return NOICE;
67 } else if (fname.ends_with(".map")) {
68 auto [line, _] = StringOp::splitOnFirst(buffer, "\n\r");
69 if (StringOp::containsCaseInsensitive(line, "hi-tech")) {
70 // HiTech link map file
71 return LINKMAP;
72 }
73 // map file output by the Z80ASM from Z88DK
74 return GENERIC;
75 } else if (fname.ends_with(".sym")) {
76 // auto detect which sym file
77 auto [line, _] = StringOp::splitOnFirst(buffer, "\n\r");
78 if (line.starts_with("; Symbol table")) {
79 return ASMSX;
80 } else if (StringOp::containsCaseInsensitive(line, " %equ ")) { // TNIASM1
81 return GENERIC;
82 } else if (StringOp::containsCaseInsensitive(line, " equ ")) {
83 return GENERIC;
84 } else if (StringOp::containsCaseInsensitive(line, "Sections:")) {
85 return VASM;
86 } else if (line.starts_with("; this file was created with wlalink")) {
87 return WLALINK_NOGMB;
88 } else {
89 // this is a blunt conclusion but I don't know a way
90 // to detect this file type
91 return HTC;
92 }
93 } else if (fname.ends_with(".symbol") || fname.ends_with(".publics") || fname.ends_with(".sys")) {
94 /* They are the same type of file. For some reason the Debian
95 * manpage uses the extension ".sys"
96 * pasmo doc -> pasmo [options] file.asm file.bin [file.symbol [file.publics] ]
97 * pasmo manpage in Debian -> pasmo [options] file.asm file.bin [file.sys]
98 */
99 return GENERIC; // pasmo
100 }
101 return GENERIC;
102}
103
105 std::string_view filename, std::string_view buffer, SymbolFile::Type type,
106 function_ref<std::optional<Symbol>(std::span<std::string_view>)> lineParser)
107{
108 SymbolFile result;
109 result.filename = filename;
110 result.type = type;
111
112 static constexpr std::string_view whitespace = " \t\r";
113 for (std::string_view fullLine : StringOp::split_view(buffer, '\n')) {
114 auto [line, _] = StringOp::splitOnFirst(fullLine, ';');
115
117 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 3 + 1)};
118 if (auto symbol = lineParser(tokens)) {
119 result.symbols.push_back(std::move(*symbol));
120 }
121 }
122
123 return result;
124}
125
126template<typename T>
127[[nodiscard]] std::optional<T> SymbolManager::parseValue(std::string_view str)
128{
129 if (str.ends_with('h') || str.ends_with('H')) { // hex
130 str.remove_suffix(1);
131 return StringOp::stringToBase<16, T>(str);
132 }
133 if (str.starts_with('$') || str.starts_with('#')) { // hex
134 str.remove_prefix(1);
135 return StringOp::stringToBase<16, T>(str);
136 }
137 if (str.starts_with('%')) { // bin
138 str.remove_prefix(1);
139 return StringOp::stringToBase<2, T>(str);
140 }
141 // this recognizes the prefixes "0x" or "0X" (for hexadecimal)
142 // and "0b" or "0B" (for binary)
143 // no prefix in interpreted as decimal
144 // "0" as a prefix for octal is intentionally NOT supported
145 return StringOp::stringTo<T>(str);
146}
147
148// explicitly instantiate for uint16_t and uint32_t (needed for unittest)
149template std::optional<uint16_t> SymbolManager::parseValue<uint16_t>(std::string_view);
150template std::optional<uint32_t> SymbolManager::parseValue<uint32_t>(std::string_view);
151
152[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabel(std::string_view label, uint32_t value)
153{
154 if (label.ends_with(':')) label.remove_suffix(1);
155 if (label.empty()) return {};
156
157 auto tmp{value > 0xFFFF ? std::optional<uint16_t>(static_cast<uint16_t>(value >> 16)) : std::nullopt};
158 return Symbol{std::string(label), static_cast<uint16_t>(value), {}, tmp};
159}
160
161[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabelAndValue(std::string_view label, std::string_view value)
162{
163 if (auto num = parseValue<uint16_t>(value)) {
164 return checkLabel(label, *num);
165 }
166 return {};
167}
168
169[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabelSegmentAndValue(std::string_view label, std::string_view value)
170{
171 if (auto num = parseValue<uint32_t>(value)) {
172 return checkLabel(label, *num);
173 }
174 return {};
175}
176
177[[nodiscard]] SymbolFile SymbolManager::loadGeneric(std::string_view filename, std::string_view buffer)
178{
179 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
180 if (tokens.size() != 3) return {};
181 auto label = tokens[0];
182 auto equ = tokens[1];
183 auto value = tokens[2];
185 if (!cmp(equ, "equ") && // TNIASM0, PASMO, SJASM, ...
186 !cmp(equ, "%equ") && // TNIASM1
187 (equ != "=")) return {}; // Z80ASM map file (Z88DK)
188 return checkLabelAndValue(label, value);
189 };
190 return loadLines(filename, buffer, SymbolFile::Type::GENERIC, parseLine);
191}
192
193[[nodiscard]] SymbolFile SymbolManager::loadNoICE(std::string_view filename, std::string_view buffer)
194{
195 bool anySegment = false;
196 auto parseLine = [&](std::span<std::string_view> tokens) -> std::optional<Symbol> {
197 if (tokens.size() != 3) return {};
198 auto def = tokens[0];
199 auto label = tokens[1];
200 auto value = tokens[2];
201 if (StringOp::casecmp cmp; !cmp(def, "def")) return {};
202 // detecting segment information above 16bits
203 auto symbol = checkLabelSegmentAndValue(label, value);
204 anySegment |= symbol->segment.has_value();
205 return symbol;
206 };
207 auto file = loadLines(filename, buffer, SymbolFile::Type::NOICE, parseLine);
208 // Heuristic: if all segments in the symbol file are 0,
209 // then assume the file contains no segment information.
210 if (anySegment) {
211 for (auto& symbol: file.getSymbols()) {
212 if (!symbol.segment) symbol.segment = 0;
213 }
214 }
215 return file;
216}
217
218[[nodiscard]] SymbolFile SymbolManager::loadHTC(std::string_view filename, std::string_view buffer)
219{
220 // TODO check with real HTC file
221 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
222 if (tokens.size() != 3) return {};
223 auto label = tokens[0];
224 auto value = tokens[1];
225 // tokens[2] ???
226
227 auto val = StringOp::stringToBase<16, uint16_t>(value);
228 if (!val) return {};
229 return checkLabel(label, *val);
230 };
231 return loadLines(filename, buffer, SymbolFile::Type::HTC, parseLine);
232}
233
234[[nodiscard]] SymbolFile SymbolManager::loadVASM(std::string_view filename, std::string_view buffer)
235{
236 SymbolFile result;
237 result.filename = filename;
239
240 static constexpr std::string_view whitespace = " \t\r";
241 bool skipLines = true;
242 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
243 if (skipLines) {
244 if (line.starts_with("Symbols by value:")) {
245 skipLines = false;
246 }
247 continue;
248 }
249
251 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 2 + 1)};
252 if (tokens.size() != 2) continue;
253 auto value = tokens[0];
254 auto label = tokens[1];
255
256 if (auto val = StringOp::stringToBase<16, uint16_t>(value)) {
257 if (auto symbol = checkLabel(label, *val)) {
258 result.symbols.push_back(std::move(*symbol));
259 }
260 }
261 }
262
263 return result;
264}
265
266[[nodiscard]] SymbolFile SymbolManager::loadNoGmb(std::string_view filename, std::string_view buffer)
267{
268 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
269 if (tokens.size() != 2) return {};
270 auto value = tokens[0];
271 auto label = tokens[1];
272 if (!value.starts_with("00:")) return {};
273 std::optional<uint16_t> num = StringOp::stringToBase<16, uint16_t>(value.substr(3));
274 if (!num.has_value()) return {};
275 return checkLabel(label, num.value());
276 };
277 return loadLines(filename, buffer, SymbolFile::Type::WLALINK_NOGMB, parseLine);
278}
279
280[[nodiscard]] SymbolFile SymbolManager::loadASMSX(std::string_view filename, std::string_view buffer)
281{
282 SymbolFile result;
283 result.filename = filename;
285
286 static constexpr std::string_view whitespace = " \t\r";
287 bool symbolPart = false;
288 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
289 if (line.starts_with(';')) {
290 if (line.starts_with("; global and local")) {
291 symbolPart = true;
292 } else if (line.starts_with("; other")) {
293 symbolPart = false;
294 }
295 continue;
296 }
297 if (!symbolPart) continue;
298
299 // Possible formats are: (checked in: https://github.com/Fubukimaru/asMSX/blob/master/src/dura.y#L3987)
300 // <abcd>h <name> with <abcd> a 4-digit hex value
301 // <xy>h:<abcd>h <name> <xy> a 2-digit hex indicating the MegaRom Page (ignored)
302 // <name> the symbol name
304 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 2 + 1)};
305 if (tokens.size() != 2) continue;
306 auto value = tokens[0];
307 auto label = tokens[1];
308
309 auto [f, l] = StringOp::splitOnFirst(value, ':');
310 value = l.empty() ? f : l;
311
312 if (auto symbol = checkLabelAndValue(label, value)) {
313 result.symbols.push_back(std::move(*symbol));
314 }
315 }
316
317 return result;
318}
319
320[[nodiscard]] std::optional<unsigned> SymbolManager::isHexDigit(char c)
321{
322 if ('0' <= c && c <= '9') return c - '0';
323 if ('A' <= c && c <= 'F') return c - 'A' + 10;
324 if ('a' <= c && c <= 'f') return c - 'a' + 10;
325 return {};
326}
327[[nodiscard]] std::optional<uint16_t> SymbolManager::is4DigitHex(std::string_view s)
328{
329 if (s.size() != 4) return {};
330 unsigned value = 0;
331 for (int i = 0; i < 4; ++i) {
332 auto digit = isHexDigit(s[i]);
333 if (!digit) return {};
334 value = (value << 4) | *digit;
335 }
336 return narrow<uint16_t>(value);
337}
338
339[[nodiscard]] SymbolFile SymbolManager::loadLinkMap(std::string_view filename, std::string_view buffer)
340{
341 // Hi-Tech C link map file. Here's an example of such a file:
342 // https://github.com/artrag/C-experiments-for-msx/blob/master/START.MAP
343 SymbolFile result;
344 result.filename = filename;
346
347 static constexpr std::string_view whitespace = " \t\r";
348 bool symbolPart = false;
349 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
350 if (!symbolPart) {
351 if (line.contains("Symbol Table")) {
352 symbolPart = true;
353 }
354 continue;
355 }
356 // Here's an example of a few lines:
357 // asllmod text 2CE7 asllsub text 0AEE
358 // cret text 2E58 csv text 2E4C
359 // float_or_long_used (abs) 0001 indir text 2E5F
360 // Note:
361 // * Multiple (2 in this case) symbols are defined in a single line.
362 // * The width of the columns seems to be the same within a single file, but not across files (?)
363 // * Looking at a single symbol:
364 // * There are 3 columns: name, psect, value
365 // * BUT the psect column can be empty!!!
366 // This in combination with an unknown column-width makes parsing difficult.
367 // The heuristic we use is that the last column must match: [0-9A-Fa-f]{4}
368 auto tokens = StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace);
369 auto it = tokens.begin();
370 auto et = tokens.end();
371 while (it != et) {
372 auto label = *it++;
373
374 if (it == et) break;
375 auto value = *it++; // this could either be the psect or the value column
376 if (auto val = is4DigitHex(value)) {
377 result.symbols.emplace_back(std::string(label), *val, std::nullopt, std::nullopt);
378 continue;
379 }
380
381 if (it == et) break;
382 value = *it++; // try again with 3rd column
383 auto val = is4DigitHex(value);
384 if (!val) break; // if this also doesn't work there's something wrong, skip this line
385 result.symbols.emplace_back(std::string(label), *val, std::nullopt, std::nullopt);
386 }
387 }
388
389 return result;
390}
391
392[[nodiscard]] SymbolFile SymbolManager::loadSymbolFile(const std::string& filename, SymbolFile::Type type, std::optional<uint8_t> slot)
393{
394 File file(filename);
395 auto buf = file.mmap();
396 std::string_view buffer(std::bit_cast<const char*>(buf.data()), buf.size());
397
398 using enum SymbolFile::Type;
399 if (type == AUTO_DETECT) {
400 type = detectType(filename, buffer);
401 }
402 assert(type != AUTO_DETECT);
403
404 auto symbolFile = [&]{
405 switch (type) {
406 case ASMSX:
407 return loadASMSX(filename, buffer);
408 case GENERIC:
409 return loadGeneric(filename, buffer);
410 case HTC:
411 return loadHTC(filename, buffer);
412 case LINKMAP:
413 return loadLinkMap(filename, buffer);
414 case NOICE:
415 return loadNoICE(filename, buffer);
416 case VASM:
417 return loadVASM(filename, buffer);
418 case WLALINK_NOGMB:
419 return loadNoGmb(filename, buffer);
420 default: UNREACHABLE;
421 }
422 }();
423
424 // Update slot info for the file and each of its symbol
425 symbolFile.slot = slot;
426 for (auto& symbol: symbolFile.getSymbols()) {
427 symbol.slot = slot;
428 }
429
430 return symbolFile;
431}
432
433void SymbolManager::refresh()
434{
435 // Drop caches
436 lookupValueCache.clear();
437
438 // Allow to access symbol-values in Tcl expression with syntax: $sym(JIFFY)
439 auto& interp = commandController.getInterpreter();
440 TclObject arrayName("sym");
441 interp.unsetVariable(arrayName.getString().c_str());
442 for (const auto& file : files) {
443 for (const auto& sym : file.symbols) {
444 interp.setVariable(arrayName, TclObject(sym.name), TclObject(sym.value));
445 }
446 }
447
448 if (observer) observer->notifySymbolsChanged();
449}
450
451bool SymbolManager::reloadFile(const std::string& filename, LoadEmpty loadEmpty, SymbolFile::Type type, std::optional<uint8_t> slot)
452{
453 auto file = loadSymbolFile(filename, type, slot); // might throw
454 if (file.symbols.empty() && loadEmpty == LoadEmpty::NOT_ALLOWED) return false;
455
456 if (auto it = ranges::find(files, filename, &SymbolFile::filename);
457 it == files.end()) {
458 files.push_back(std::move(file));
459 } else {
460 *it = std::move(file);
461 }
462 refresh();
463 return true;
464}
465
466void SymbolManager::removeFile(std::string_view filename)
467{
468 auto it = ranges::find(files, filename, &SymbolFile::filename);
469 if (it == files.end()) return; // not found
470 files.erase(it);
471 refresh();
472}
473
475{
476 files.clear();
477 refresh();
478}
479
480std::optional<uint16_t> SymbolManager::lookupSymbol(std::string_view str) const
481{
482 // linear search is fine: only used interactively
483 // prefer an exact match
484 for (const auto& file : files) {
485 if (auto it = ranges::find(file.symbols, str, &Symbol::name);
486 it != file.symbols.end()) {
487 return it->value;
488 }
489 }
490 // but if not found, a case-insensitive match is fine as well
491 for (const auto& file : files) {
492 if (auto it = ranges::find_if(file.symbols, [&](const auto& sym) {
493 return StringOp::casecmp{}(str, sym.name); });
494 it != file.symbols.end()) {
495 return it->value;
496 }
497 }
498 return {};
499}
500
501std::optional<uint16_t> SymbolManager::parseSymbolOrValue(std::string_view str) const
502{
503 // first try symbol
504 if (auto r = lookupSymbol(str)) return r;
505 // if not found, then try to parse as a numerical value
506 return parseValue<uint16_t>(str);
507}
508
509std::span<Symbol const * const> SymbolManager::lookupValue(uint16_t value)
510{
511 if (lookupValueCache.empty()) {
512 for (const auto& file : files) {
513 for (const auto& sym : file.symbols) {
514 auto [it, inserted] = lookupValueCache.try_emplace(sym.value, std::vector<const Symbol*>{});
515 it->second.push_back(&sym);
516 }
517 }
518 }
519 if (auto* sym = lookup(lookupValueCache, value)) {
520 return *sym;
521 }
522 return {};
523}
524
525SymbolFile* SymbolManager::findFile(std::string_view filename)
526{
527 if (auto it = ranges::find(files, filename, &SymbolFile::filename); it == files.end()) {
528 return nullptr;
529 } else {
530 return std::to_address(it);
531 }
532}
533
534std::string SymbolManager::getFileFilters()
535{
536 return "Auto-detect file type (*){.*},"
537 "asMSX 0.x symbol files (*.sym){.sym},"
538 "HiTech C link map files (*.map){.map},"
539 "HiTech C symbol files (*.sym){.sym},"
540 "NoICE command files (*.noi){.noi},"
541 "pasmo symbol files (*.symbol *.publics *.sys){.symbol,.publics,.sys},"
542 "tniASM 0.x symbol files (*.sym){.sym},"
543 "tniASM 1.x symbol files (*.sym){.sym},"
544 "vasm symbol files (*.sym){.sym},"
545 "wlalink no$gmb symbol files (*.sym){.sym}";
546}
547
548SymbolFile::Type SymbolManager::getTypeForFilter(std::string_view filter)
549{
550 using enum SymbolFile::Type;
551 if (filter.starts_with("Auto")) {
552 return AUTO_DETECT;
553 } else if (filter.starts_with("asMSX")) {
554 return ASMSX;
555 } else if (filter.starts_with("HiTechC link")) {
556 return LINKMAP;
557 } else if (filter.starts_with("HiTechC symbol")) {
558 return HTC;
559 } else if (filter.starts_with("NoICE")) {
560 return NOICE;
561 } else if (filter.starts_with("vasm")) {
562 return VASM;
563 } else if (filter.starts_with("wlalink")) {
564 return WLALINK_NOGMB;
565 } else {
566 return GENERIC;
567 }
568}
569
570} // namespace openmsx
virtual Interpreter & getInterpreter()=0
std::span< const uint8_t > mmap()
Map file in memory.
Definition File.cc:102
static std::optional< unsigned > isHexDigit(char c)
static SymbolFile loadLines(std::string_view filename, std::string_view buffer, SymbolFile::Type type, function_ref< std::optional< Symbol >(std::span< std::string_view >)> lineParser)
static std::optional< Symbol > checkLabelSegmentAndValue(std::string_view label, std::string_view value)
static SymbolFile loadNoGmb(std::string_view filename, std::string_view buffer)
static SymbolFile::Type detectType(std::string_view filename, std::string_view buffer)
static SymbolFile loadNoICE(std::string_view filename, std::string_view buffer)
static SymbolFile loadHTC(std::string_view filename, std::string_view buffer)
static SymbolFile loadGeneric(std::string_view filename, std::string_view buffer)
static SymbolFile loadSymbolFile(const std::string &filename, SymbolFile::Type type, std::optional< uint8_t > slot={})
static SymbolFile loadVASM(std::string_view filename, std::string_view buffer)
static std::optional< uint16_t > is4DigitHex(std::string_view s)
void removeFile(std::string_view filename)
std::optional< uint16_t > lookupSymbol(std::string_view s) const
static std::optional< Symbol > checkLabel(std::string_view label, uint32_t value)
bool reloadFile(const std::string &filename, LoadEmpty loadEmpty, SymbolFile::Type type, std::optional< uint8_t > slot={})
static SymbolFile loadASMSX(std::string_view filename, std::string_view buffer)
static SymbolFile loadLinkMap(std::string_view filename, std::string_view buffer)
static std::optional< T > parseValue(std::string_view str)
SymbolManager(CommandController &commandController)
static std::optional< Symbol > checkLabelAndValue(std::string_view label, std::string_view value)
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition hash_map.hh:118
std::pair< string_view, string_view > splitOnFirst(string_view str, string_view chars)
Definition StringOp.cc:95
std::string toLower(std::string_view str)
Definition StringOp.cc:26
bool containsCaseInsensitive(std::string_view haystack, std::string_view needle)
Definition StringOp.hh:181
auto split_view(std::string_view str, Separators separators)
Definition StringOp.hh:83
This file implemented 3 utility functions:
Definition Autofire.cc:11
auto find_if(InputRange &&range, UnaryPredicate pred)
Definition ranges.hh:175
auto find(InputRange &&range, const T &value)
Definition ranges.hh:162
constexpr auto take(ForwardRange &&range, size_t n)
Definition view.hh:465
constexpr from_range_t from_range
static std::optional< Type > parseType(std::string_view str)
static zstring_view toString(Type type)
std::vector< Symbol > symbols
virtual void notifySymbolsChanged()=0
std::string name
#define UNREACHABLE