openMSX
SymbolManager.cc
Go to the documentation of this file.
1#include "SymbolManager.hh"
2
4#include "File.hh"
5#include "Interpreter.hh"
6#include "TclObject.hh"
7
8#include "narrow.hh"
9#include "ranges.hh"
10#include "static_vector.hh"
11#include "stl.hh"
12#include "StringOp.hh"
13#include "unreachable.hh"
14#include "view.hh"
15
16#include <bit>
17#include <cassert>
18#include <fstream>
19
20namespace openmsx {
21
23{
24 switch (type) {
25 using enum Type;
26 case AUTO_DETECT: return "auto-detect";
27 case ASMSX: return "asMSX";
28 case GENERIC: return "generic";
29 case HTC: return "htc";
30 case LINKMAP: return "linkmap";
31 case NOICE: return "NoICE";
32 case VASM: return "vasm";
33 default: UNREACHABLE;
34 }
35}
36
37std::optional<SymbolFile::Type> SymbolFile::parseType(std::string_view str)
38{
39 using enum Type;
40 if (str == "auto-detect") return AUTO_DETECT;
41 if (str == "asMSX") return ASMSX;
42 if (str == "generic") return GENERIC;
43 if (str == "htc") return HTC;
44 if (str == "linkmap") return LINKMAP;
45 if (str == "NoICE") return NOICE;
46 if (str == "vasm") return VASM;
47 return {};
48}
49
50
52 : commandController(commandController_)
53{
54}
55
56// detection logic taken from old openmsx-debugger, could probably be improved.
57[[nodiscard]] SymbolFile::Type SymbolManager::detectType(std::string_view filename, std::string_view buffer)
58{
59 auto fname = StringOp::toLower(filename);
60
61 using enum SymbolFile::Type;
62 if (fname.ends_with(".noi")) {
63 // NoICE command file
64 return NOICE;
65 } else if (fname.ends_with(".map")) {
66 // HiTech link map file
67 return LINKMAP;
68 } else if (fname.ends_with(".sym")) {
69 // auto detect which sym file
70 auto [line, _] = StringOp::splitOnFirst(buffer, "\n\r");
71 if (line.starts_with("; Symbol table")) {
72 return ASMSX;
73 } else if (StringOp::containsCaseInsensitive(line, " %equ ")) { // TNIASM1
74 return GENERIC;
75 } else if (StringOp::containsCaseInsensitive(line, " equ ")) {
76 return GENERIC;
77 } else if (StringOp::containsCaseInsensitive(line, "Sections:")) {
78 return VASM;
79 } else {
80 // this is a blunt conclusion but I don't know a way
81 // to detect this file type
82 return HTC;
83 }
84 } else if (fname.ends_with(".symbol") || fname.ends_with(".publics") || fname.ends_with(".sys")) {
85 /* They are the same type of file. For some reason the Debian
86 * manpage uses the extension ".sys"
87 * pasmo doc -> pasmo [options] file.asm file.bin [file.symbol [file.publics] ]
88 * pasmo manpage in Debian -> pasmo [options] file.asm file.bin [file.sys]
89 */
90 return GENERIC; // pasmo
91 }
92 return GENERIC;
93}
94
96 std::string_view filename, std::string_view buffer, SymbolFile::Type type,
97 function_ref<std::optional<Symbol>(std::span<std::string_view>)> lineParser)
98{
99 SymbolFile result;
100 result.filename = filename;
101 result.type = type;
102
103 static constexpr std::string_view whitespace = " \t\r";
104 for (std::string_view fullLine : StringOp::split_view(buffer, '\n')) {
105 auto [line, _] = StringOp::splitOnFirst(fullLine, ';');
106
108 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 3 + 1)};
109 if (auto symbol = lineParser(tokens)) {
110 result.symbols.push_back(std::move(*symbol));
111 }
112 }
113
114 return result;
115}
116
117template<typename T>
118[[nodiscard]] std::optional<T> SymbolManager::parseValue(std::string_view str)
119{
120 if (str.ends_with('h') || str.ends_with('H')) { // hex
121 str.remove_suffix(1);
122 return StringOp::stringToBase<16, T>(str);
123 }
124 if (str.starts_with('$') || str.starts_with('#')) { // hex
125 str.remove_prefix(1);
126 return StringOp::stringToBase<16, T>(str);
127 }
128 if (str.starts_with('%')) { // bin
129 str.remove_prefix(1);
130 return StringOp::stringToBase<2, T>(str);
131 }
132 // this recognizes the prefixes "0x" or "0X" (for hexadecimal)
133 // and "0b" or "0B" (for binary)
134 // no prefix in interpreted as decimal
135 // "0" as a prefix for octal is intentionally NOT supported
136 return StringOp::stringTo<T>(str);
137}
138
139// explicitly instantiate for uint16_t and uint32_t (needed for unittest)
140template std::optional<uint16_t> SymbolManager::parseValue<uint16_t>(std::string_view);
141template std::optional<uint32_t> SymbolManager::parseValue<uint32_t>(std::string_view);
142
143[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabel(std::string_view label, uint32_t value)
144{
145 if (label.ends_with(':')) label.remove_suffix(1);
146 if (label.empty()) return {};
147
148 auto tmp{value > 0xFFFF ? std::optional<uint16_t>(static_cast<uint16_t>(value >> 16)) : std::nullopt};
149 return Symbol{std::string(label), static_cast<uint16_t>(value), {}, tmp};
150}
151
152[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabelAndValue(std::string_view label, std::string_view value)
153{
154 if (auto num = parseValue<uint16_t>(value)) {
155 return checkLabel(label, *num);
156 }
157 return {};
158}
159
160[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabelSegmentAndValue(std::string_view label, std::string_view value)
161{
162 if (auto num = parseValue<uint32_t>(value)) {
163 return checkLabel(label, *num);
164 }
165 return {};
166}
167
168[[nodiscard]] SymbolFile SymbolManager::loadGeneric(std::string_view filename, std::string_view buffer)
169{
170 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
171 if (tokens.size() != 3) return {};
172 auto label = tokens[0];
173 auto equ = tokens[1];
174 auto value = tokens[2];
176 if (!cmp(equ, "equ") && // TNIASM0, PASMO, SJASM, ...
177 !cmp(equ, "%equ")) return {}; // TNIASM1
178 return checkLabelAndValue(label, value);
179 };
180 return loadLines(filename, buffer, SymbolFile::Type::GENERIC, parseLine);
181}
182
183[[nodiscard]] SymbolFile SymbolManager::loadNoICE(std::string_view filename, std::string_view buffer)
184{
185 bool anySegment = false;
186 auto parseLine = [&](std::span<std::string_view> tokens) -> std::optional<Symbol> {
187 if (tokens.size() != 3) return {};
188 auto def = tokens[0];
189 auto label = tokens[1];
190 auto value = tokens[2];
191 if (StringOp::casecmp cmp; !cmp(def, "def")) return {};
192 // detecting segment information above 16bits
193 auto symbol = checkLabelSegmentAndValue(label, value);
194 anySegment |= symbol->segment.has_value();
195 return symbol;
196 };
197 auto file = loadLines(filename, buffer, SymbolFile::Type::NOICE, parseLine);
198 // Heuristic: if all segments in the symbol file are 0,
199 // then assume the file contains no segment information.
200 if (anySegment) {
201 for (auto& symbol: file.getSymbols()) {
202 if (!symbol.segment) symbol.segment = 0;
203 }
204 }
205 return file;
206}
207
208[[nodiscard]] SymbolFile SymbolManager::loadHTC(std::string_view filename, std::string_view buffer)
209{
210 // TODO check with real HTC file
211 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
212 if (tokens.size() != 3) return {};
213 auto label = tokens[0];
214 auto value = tokens[1];
215 // tokens[2] ???
216
217 auto val = StringOp::stringToBase<16, uint16_t>(value);
218 if (!val) return {};
219 return checkLabel(label, *val);
220 };
221 return loadLines(filename, buffer, SymbolFile::Type::HTC, parseLine);
222}
223
224[[nodiscard]] SymbolFile SymbolManager::loadVASM(std::string_view filename, std::string_view buffer)
225{
226 SymbolFile result;
227 result.filename = filename;
229
230 static constexpr std::string_view whitespace = " \t\r";
231 bool skipLines = true;
232 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
233 if (skipLines) {
234 if (line.starts_with("Symbols by value:")) {
235 skipLines = false;
236 }
237 continue;
238 }
239
241 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 2 + 1)};
242 if (tokens.size() != 2) continue;
243 auto value = tokens[0];
244 auto label = tokens[1];
245
246 if (auto val = StringOp::stringToBase<16, uint16_t>(value)) {
247 if (auto symbol = checkLabel(label, *val)) {
248 result.symbols.push_back(std::move(*symbol));
249 }
250 }
251 }
252
253 return result;
254}
255
256[[nodiscard]] SymbolFile SymbolManager::loadASMSX(std::string_view filename, std::string_view buffer)
257{
258 SymbolFile result;
259 result.filename = filename;
261
262 static constexpr std::string_view whitespace = " \t\r";
263 bool symbolPart = false;
264 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
265 if (line.starts_with(';')) {
266 if (line.starts_with("; global and local")) {
267 symbolPart = true;
268 } else if (line.starts_with("; other")) {
269 symbolPart = false;
270 }
271 continue;
272 }
273 if (!symbolPart) continue;
274
275 // Possible formats are: (checked in: https://github.com/Fubukimaru/asMSX/blob/master/src/dura.y#L3987)
276 // <abcd>h <name> with <abcd> a 4-digit hex value
277 // <xy>h:<abcd>h <name> <xy> a 2-digit hex indicating the MegaRom Page (ignored)
278 // <name> the symbol name
280 view::take(StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace), 2 + 1)};
281 if (tokens.size() != 2) continue;
282 auto value = tokens[0];
283 auto label = tokens[1];
284
285 auto [f, l] = StringOp::splitOnFirst(value, ':');
286 value = l.empty() ? f : l;
287
288 if (auto symbol = checkLabelAndValue(label, value)) {
289 result.symbols.push_back(std::move(*symbol));
290 }
291 }
292
293 return result;
294}
295
296[[nodiscard]] std::optional<unsigned> SymbolManager::isHexDigit(char c)
297{
298 if ('0' <= c && c <= '9') return c - '0';
299 if ('A' <= c && c <= 'F') return c - 'A' + 10;
300 if ('a' <= c && c <= 'f') return c - 'a' + 10;
301 return {};
302}
303[[nodiscard]] std::optional<uint16_t> SymbolManager::is4DigitHex(std::string_view s)
304{
305 if (s.size() != 4) return {};
306 unsigned value = 0;
307 for (int i = 0; i < 4; ++i) {
308 auto digit = isHexDigit(s[i]);
309 if (!digit) return {};
310 value = (value << 4) | *digit;
311 }
312 return narrow<uint16_t>(value);
313}
314
315[[nodiscard]] SymbolFile SymbolManager::loadLinkMap(std::string_view filename, std::string_view buffer)
316{
317 // Hi-Tech C link map file. Here's an example of such a file:
318 // https://github.com/artrag/C-experiments-for-msx/blob/master/START.MAP
319 SymbolFile result;
320 result.filename = filename;
322
323 static constexpr std::string_view whitespace = " \t\r";
324 bool symbolPart = false;
325 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
326 if (!symbolPart) {
327 if (line.find("Symbol Table") != std::string_view::npos) { // c++23 contains()
328 symbolPart = true;
329 }
330 continue;
331 }
332 // Here's an example of a few lines:
333 // asllmod text 2CE7 asllsub text 0AEE
334 // cret text 2E58 csv text 2E4C
335 // float_or_long_used (abs) 0001 indir text 2E5F
336 // Note:
337 // * Multiple (2 in this case) symbols are defined in a single line.
338 // * The width of the columns seems to be the same within a single file, but not across files (?)
339 // * Looking at a single symbol:
340 // * There are 3 columns: name, psect, value
341 // * BUT the psect column can be empty!!!
342 // This in combination with an unknown column-width makes parsing difficult.
343 // The heuristic we use is that the last column must match: [0-9A-Fa-f]{4}
344 auto tokens = StringOp::split_view<StringOp::EmptyParts::REMOVE>(line, whitespace);
345 auto it = tokens.begin();
346 auto et = tokens.end();
347 while (it != et) {
348 auto label = *it++;
349
350 if (it == et) break;
351 auto value = *it++; // this could either be the psect or the value column
352 if (auto val = is4DigitHex(value)) {
353 result.symbols.emplace_back(std::string(label), *val, std::nullopt, std::nullopt);
354 continue;
355 }
356
357 if (it == et) break;
358 value = *it++; // try again with 3rd column
359 auto val = is4DigitHex(value);
360 if (!val) break; // if this also doesn't work there's something wrong, skip this line
361 result.symbols.emplace_back(std::string(label), *val, std::nullopt, std::nullopt);
362 }
363 }
364
365 return result;
366}
367
368[[nodiscard]] SymbolFile SymbolManager::loadSymbolFile(const std::string& filename, SymbolFile::Type type, std::optional<uint8_t> slot)
369{
370 File file(filename);
371 auto buf = file.mmap();
372 std::string_view buffer(std::bit_cast<const char*>(buf.data()), buf.size());
373
374 using enum SymbolFile::Type;
375 if (type == AUTO_DETECT) {
376 type = detectType(filename, buffer);
377 }
378 assert(type != AUTO_DETECT);
379
380 auto symbolFile = [&]{
381 switch (type) {
382 case ASMSX:
383 return loadASMSX(filename, buffer);
384 case GENERIC:
385 return loadGeneric(filename, buffer);
386 case HTC:
387 return loadHTC(filename, buffer);
388 case LINKMAP:
389 return loadLinkMap(filename, buffer);
390 case NOICE:
391 return loadNoICE(filename, buffer);
392 case VASM:
393 return loadVASM(filename, buffer);
394 default: UNREACHABLE;
395 }
396 }();
397
398 // Update slot info for the file and each of its symbol
399 symbolFile.slot = slot;
400 for (auto& symbol: symbolFile.getSymbols()) {
401 symbol.slot = slot;
402 }
403
404 return symbolFile;
405}
406
407void SymbolManager::refresh()
408{
409 // Drop caches
410 lookupValueCache.clear();
411
412 // Allow to access symbol-values in Tcl expression with syntax: $sym(JIFFY)
413 auto& interp = commandController.getInterpreter();
414 TclObject arrayName("sym");
415 interp.unsetVariable(arrayName.getString().c_str());
416 for (const auto& file : files) {
417 for (const auto& sym : file.symbols) {
418 interp.setVariable(arrayName, TclObject(sym.name), TclObject(sym.value));
419 }
420 }
421
422 if (observer) observer->notifySymbolsChanged();
423}
424
425bool SymbolManager::reloadFile(const std::string& filename, LoadEmpty loadEmpty, SymbolFile::Type type, std::optional<uint8_t> slot)
426{
427 auto file = loadSymbolFile(filename, type, slot); // might throw
428 if (file.symbols.empty() && loadEmpty == LoadEmpty::NOT_ALLOWED) return false;
429
430 if (auto it = ranges::find(files, filename, &SymbolFile::filename);
431 it == files.end()) {
432 files.push_back(std::move(file));
433 } else {
434 *it = std::move(file);
435 }
436 refresh();
437 return true;
438}
439
440void SymbolManager::removeFile(std::string_view filename)
441{
442 auto it = ranges::find(files, filename, &SymbolFile::filename);
443 if (it == files.end()) return; // not found
444 files.erase(it);
445 refresh();
446}
447
449{
450 files.clear();
451 refresh();
452}
453
454std::optional<uint16_t> SymbolManager::parseSymbolOrValue(std::string_view str) const
455{
456 // linear search is fine: only used interactively
457 // prefer an exact match
458 for (const auto& file : files) {
459 if (auto it = ranges::find(file.symbols, str, &Symbol::name);
460 it != file.symbols.end()) {
461 return it->value;
462 }
463 }
464 // but if not found, a case-insensitive match is fine as well
465 for (const auto& file : files) {
466 if (auto it = ranges::find_if(file.symbols, [&](const auto& sym) {
467 return StringOp::casecmp{}(str, sym.name); });
468 it != file.symbols.end()) {
469 return it->value;
470 }
471 }
472 // also not found, then try to parse as a numerical value
473 return parseValue<uint16_t>(str);
474}
475
476std::span<Symbol const * const> SymbolManager::lookupValue(uint16_t value)
477{
478 if (lookupValueCache.empty()) {
479 for (const auto& file : files) {
480 for (const auto& sym : file.symbols) {
481 auto [it, inserted] = lookupValueCache.try_emplace(sym.value, std::vector<const Symbol*>{});
482 it->second.push_back(&sym);
483 }
484 }
485 }
486 if (auto* sym = lookup(lookupValueCache, value)) {
487 return *sym;
488 }
489 return {};
490}
491
492SymbolFile* SymbolManager::findFile(std::string_view filename)
493{
494 if (auto it = ranges::find(files, filename, &SymbolFile::filename); it == files.end()) {
495 return nullptr;
496 } else {
497 return &(*it);
498 }
499}
500
501std::string SymbolManager::getFileFilters()
502{
503 return "Auto-detect file type (*){.*},"
504 "asMSX 0.x symbol files (*.sym){.sym},"
505 "HiTech C link map files (*.map){.map},"
506 "HiTech C symbol files (*.sym){.sym},"
507 "NoICE command files (*.noi){.noi},"
508 "pasmo symbol files (*.symbol *.publics *.sys){.symbol,.publics,.sys},"
509 "tniASM 0.x symbol files (*.sym){.sym},"
510 "tniASM 1.x symbol files (*.sym){.sym},"
511 "vasm symbol files (*.sym){.sym}";
512}
513
514SymbolFile::Type SymbolManager::getTypeForFilter(std::string_view filter)
515{
516 using enum SymbolFile::Type;
517 if (filter.starts_with("Auto")) {
518 return AUTO_DETECT;
519 } else if (filter.starts_with("asMSX")) {
520 return ASMSX;
521 } else if (filter.starts_with("HiTechC link")) {
522 return LINKMAP;
523 } else if (filter.starts_with("HiTechC symbol")) {
524 return HTC;
525 } else if (filter.starts_with("NoICE")) {
526 return NOICE;
527 } else if (filter.starts_with("vasm")) {
528 return VASM;
529 } else {
530 return GENERIC;
531 }
532}
533
534} // namespace openmsx
virtual Interpreter & getInterpreter()=0
std::span< const uint8_t > mmap()
Map file in memory.
Definition File.cc:102
static std::optional< unsigned > isHexDigit(char c)
static SymbolFile loadLines(std::string_view filename, std::string_view buffer, SymbolFile::Type type, function_ref< std::optional< Symbol >(std::span< std::string_view >)> lineParser)
static std::optional< Symbol > checkLabelSegmentAndValue(std::string_view label, std::string_view value)
static SymbolFile::Type detectType(std::string_view filename, std::string_view buffer)
static SymbolFile loadNoICE(std::string_view filename, std::string_view buffer)
static SymbolFile loadHTC(std::string_view filename, std::string_view buffer)
static SymbolFile loadGeneric(std::string_view filename, std::string_view buffer)
static SymbolFile loadSymbolFile(const std::string &filename, SymbolFile::Type type, std::optional< uint8_t > slot={})
static SymbolFile loadVASM(std::string_view filename, std::string_view buffer)
static std::optional< uint16_t > is4DigitHex(std::string_view s)
void removeFile(std::string_view filename)
static std::optional< Symbol > checkLabel(std::string_view label, uint32_t value)
bool reloadFile(const std::string &filename, LoadEmpty loadEmpty, SymbolFile::Type type, std::optional< uint8_t > slot={})
static SymbolFile loadASMSX(std::string_view filename, std::string_view buffer)
std::optional< uint16_t > parseSymbolOrValue(std::string_view s) const
static SymbolFile loadLinkMap(std::string_view filename, std::string_view buffer)
static std::optional< T > parseValue(std::string_view str)
SymbolManager(CommandController &commandController)
static std::optional< Symbol > checkLabelAndValue(std::string_view label, std::string_view value)
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition hash_map.hh:118
std::pair< string_view, string_view > splitOnFirst(string_view str, string_view chars)
Definition StringOp.cc:95
std::string toLower(std::string_view str)
Definition StringOp.cc:26
bool containsCaseInsensitive(std::string_view haystack, std::string_view needle)
Definition StringOp.hh:181
auto split_view(std::string_view str, Separators separators)
Definition StringOp.hh:83
This file implemented 3 utility functions:
Definition Autofire.cc:11
auto find_if(InputRange &&range, UnaryPredicate pred)
Definition ranges.hh:175
auto find(InputRange &&range, const T &value)
Definition ranges.hh:162
constexpr auto take(ForwardRange &&range, size_t n)
Definition view.hh:544
constexpr from_range_t from_range
static std::optional< Type > parseType(std::string_view str)
static zstring_view toString(Type type)
std::vector< Symbol > symbols
virtual void notifySymbolsChanged()=0
std::string name
#define UNREACHABLE