openMSX
SymbolManager.cc
Go to the documentation of this file.
1#include "SymbolManager.hh"
2
4#include "File.hh"
5#include "Interpreter.hh"
6#include "TclObject.hh"
7
8#include "narrow.hh"
9#include "ranges.hh"
10#include "static_vector.hh"
11#include "stl.hh"
12#include "StringOp.hh"
13#include "unreachable.hh"
14#include "view.hh"
15
16#include <bit>
17#include <cassert>
18#include <fstream>
19
20namespace openmsx {
21
23{
24 switch (type) {
25 case Type::AUTO_DETECT: return "auto-detect";
26 case Type::ASMSX: return "asMSX";
27 case Type::GENERIC: return "generic";
28 case Type::HTC: return "htc";
29 case Type::LINKMAP: return "linkmap";
30 case Type::NOICE: return "NoICE";
31 case Type::VASM: return "vasm";
32 default: UNREACHABLE;
33 }
34}
35
36std::optional<SymbolFile::Type> SymbolFile::parseType(std::string_view str)
37{
38 if (str == "auto-detect") return Type::AUTO_DETECT;
39 if (str == "asMSX") return Type::ASMSX;
40 if (str == "generic") return Type::GENERIC;
41 if (str == "htc") return Type::HTC;
42 if (str == "linkmap") return Type::LINKMAP;
43 if (str == "NoICE") return Type::NOICE;
44 if (str == "vasm") return Type::VASM;
45 return {};
46}
47
48
50 : commandController(commandController_)
51{
52}
53
54// detection logic taken from old openmsx-debugger, could probably be improved.
55[[nodiscard]] SymbolFile::Type SymbolManager::detectType(const std::string& filename, std::string_view buffer)
56{
57 auto fname = StringOp::toLower(filename);
58
59 if (fname.ends_with(".noi")) {
60 // NoICE command file
62 } else if (fname.ends_with(".map")) {
63 // HiTech link map file
65 } else if (fname.ends_with(".sym")) {
66 // auto detect which sym file
67 auto [line, _] = StringOp::splitOnFirst(buffer, "\n\r");
68 if (line.starts_with("; Symbol table")) {
70 } else if (StringOp::containsCaseInsensitive(line, " %equ ")) { // TNIASM1
72 } else if (StringOp::containsCaseInsensitive(line, " equ ")) {
74 } else if (StringOp::containsCaseInsensitive(line, "Sections:")) {
76 } else {
77 // this is a blunt conclusion but I don't know a way
78 // to detect this file type
80 }
81 } else if (fname.ends_with(".symbol") || fname.ends_with(".publics") || fname.ends_with(".sys")) {
82 /* They are the same type of file. For some reason the Debian
83 * manpage uses the extension ".sys"
84 * pasmo doc -> pasmo [options] file.asm file.bin [file.symbol [file.publics] ]
85 * pasmo manpage in Debian -> pasmo [options] file.asm file.bin [file.sys]
86 */
87 return SymbolFile::Type::GENERIC; // pasmo
88 }
90}
91
93 const std::string& filename, std::string_view buffer, SymbolFile::Type type,
94 function_ref<std::optional<Symbol>(std::span<std::string_view>)> lineParser)
95{
96 SymbolFile result;
97 result.filename = filename;
98 result.type = type;
99
100 static constexpr std::string_view whitespace = " \t\r";
101 for (std::string_view fullLine : StringOp::split_view(buffer, '\n')) {
102 auto [line, _] = StringOp::splitOnFirst(fullLine, ';');
103
105 view::take(StringOp::split_view<StringOp::REMOVE_EMPTY_PARTS>(line, whitespace), 3 + 1)};
106 if (auto symbol = lineParser(tokens)) {
107 result.symbols.push_back(std::move(*symbol));
108 }
109 }
110
111 return result;
112}
113
114[[nodiscard]] std::optional<uint16_t> SymbolManager::parseValue(std::string_view str)
115{
116 if (str.ends_with('h') || str.ends_with('H')) { // hex
117 str.remove_suffix(1);
118 return StringOp::stringToBase<16, uint16_t>(str);
119 }
120 if (str.starts_with('$') || str.starts_with('#')) { // hex
121 str.remove_prefix(1);
122 return StringOp::stringToBase<16, uint16_t>(str);
123 }
124 if (str.starts_with('%')) { // bin
125 str.remove_prefix(1);
126 return StringOp::stringToBase<2, uint16_t>(str);
127 }
128 // this recognizes the prefixes "0x" or "0X" (for hexadecimal)
129 // and "0b" or "0B" (for binary)
130 // no prefix in interpreted as decimal
131 // "0" as a prefix for octal is intentionally NOT supported
132 return StringOp::stringTo<uint16_t>(str);
133}
134
135[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabel(std::string_view label, uint16_t value)
136{
137 if (label.ends_with(':')) label.remove_suffix(1);
138 if (label.empty()) return {};
139
140 return Symbol{std::string(label), value};
141}
142
143[[nodiscard]] std::optional<Symbol> SymbolManager::checkLabelAndValue(std::string_view label, std::string_view value)
144{
145 if (auto num = parseValue(value)) {
146 return checkLabel(label, *num);
147 }
148 return {};
149}
150
151[[nodiscard]] SymbolFile SymbolManager::loadGeneric(const std::string& filename, std::string_view buffer)
152{
153 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
154 if (tokens.size() != 3) return {};
155 auto label = tokens[0];
156 auto equ = tokens[1];
157 auto value = tokens[2];
159 if (!cmp(equ, "equ") && // TNIASM0, PASMO, SJASM, ...
160 !cmp(equ, "%equ")) return {}; // TNIASM1
161 return checkLabelAndValue(label, value);
162 };
163 return loadLines(filename, buffer, SymbolFile::Type::GENERIC, parseLine);
164}
165
166[[nodiscard]] SymbolFile SymbolManager::loadNoICE(const std::string& filename, std::string_view buffer)
167{
168 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
169 if (tokens.size() != 3) return {};
170 auto def = tokens[0];
171 auto label = tokens[1];
172 auto value = tokens[2];
174 if (!cmp(def, "def")) return {};
175 return checkLabelAndValue(label, value);
176 };
177 return loadLines(filename, buffer, SymbolFile::Type::NOICE, parseLine);
178}
179
180[[nodiscard]] SymbolFile SymbolManager::loadHTC(const std::string& filename, std::string_view buffer)
181{
182 // TODO check with real HTC file
183 auto parseLine = [](std::span<std::string_view> tokens) -> std::optional<Symbol> {
184 if (tokens.size() != 3) return {};
185 auto label = tokens[0];
186 auto value = tokens[1];
187 // tokens[2] ???
188
189 auto val = StringOp::stringToBase<16, uint16_t>(value);
190 if (!val) return {};
191 return checkLabel(label, *val);
192 };
193 return loadLines(filename, buffer, SymbolFile::Type::HTC, parseLine);
194}
195
196[[nodiscard]] SymbolFile SymbolManager::loadVASM(const std::string& filename, std::string_view buffer)
197{
198 SymbolFile result;
199 result.filename = filename;
201
202 static constexpr std::string_view whitespace = " \t\r";
203 bool skipLines = true;
204 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
205 if (skipLines) {
206 if (line.starts_with("Symbols by value:")) {
207 skipLines = false;
208 }
209 continue;
210 }
211
213 view::take(StringOp::split_view<StringOp::REMOVE_EMPTY_PARTS>(line, whitespace), 2 + 1)};
214 if (tokens.size() != 2) continue;
215 auto value = tokens[0];
216 auto label = tokens[1];
217
218 if (auto val = StringOp::stringToBase<16, uint16_t>(value)) {
219 if (auto symbol = checkLabel(label, *val)) {
220 result.symbols.push_back(std::move(*symbol));
221 }
222 }
223 }
224
225 return result;
226}
227
228[[nodiscard]] SymbolFile SymbolManager::loadASMSX(const std::string& filename, std::string_view buffer)
229{
230 SymbolFile result;
231 result.filename = filename;
233
234 static constexpr std::string_view whitespace = " \t\r";
235 bool symbolPart = false;
236 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
237 if (line.starts_with(';')) {
238 if (line.starts_with("; global and local")) {
239 symbolPart = true;
240 } else if (line.starts_with("; other")) {
241 symbolPart = false;
242 }
243 continue;
244 }
245 if (!symbolPart) continue;
246
247 // Possible formats are: (checked in: https://github.com/Fubukimaru/asMSX/blob/master/src/dura.y#L3987)
248 // <abcd>h <name> with <abcd> a 4-digit hex value
249 // <xy>h:<abcd>h <name> <xy> a 2-digit hex indicating the MegaRom Page (ignored)
250 // <name> the symbol name
252 view::take(StringOp::split_view<StringOp::REMOVE_EMPTY_PARTS>(line, whitespace), 2 + 1)};
253 if (tokens.size() != 2) continue;
254 auto value = tokens[0];
255 auto label = tokens[1];
256
257 auto [f, l] = StringOp::splitOnFirst(value, ':');
258 value = l.empty() ? f : l;
259
260 if (auto symbol = checkLabelAndValue(label, value)) {
261 result.symbols.push_back(std::move(*symbol));
262 }
263 }
264
265 return result;
266}
267
268[[nodiscard]] std::optional<unsigned> SymbolManager::isHexDigit(char c)
269{
270 if ('0' <= c && c <= '9') return c - '0';
271 if ('A' <= c && c <= 'F') return c - 'A' + 10;
272 if ('a' <= c && c <= 'f') return c - 'a' + 10;
273 return {};
274}
275[[nodiscard]] std::optional<uint16_t> SymbolManager::is4DigitHex(std::string_view s)
276{
277 if (s.size() != 4) return {};
278 unsigned value = 0;
279 for (int i = 0; i < 4; ++i) {
280 auto digit = isHexDigit(s[i]);
281 if (!digit) return {};
282 value = (value << 4) | *digit;
283 }
284 return narrow<uint16_t>(value);
285}
286
287[[nodiscard]] SymbolFile SymbolManager::loadLinkMap(const std::string& filename, std::string_view buffer)
288{
289 // Hi-Tech C link map file. Here's an example of such a file:
290 // https://github.com/artrag/C-experiments-for-msx/blob/master/START.MAP
291 SymbolFile result;
292 result.filename = filename;
294
295 static constexpr std::string_view whitespace = " \t\r";
296 bool symbolPart = false;
297 for (std::string_view line : StringOp::split_view(buffer, '\n')) {
298 if (!symbolPart) {
299 if (line.find("Symbol Table") != std::string_view::npos) { // c++23 contains()
300 symbolPart = true;
301 }
302 continue;
303 }
304 // Here's an example of a few lines:
305 // asllmod text 2CE7 asllsub text 0AEE
306 // cret text 2E58 csv text 2E4C
307 // float_or_long_used (abs) 0001 indir text 2E5F
308 // Note:
309 // * Multiple (2 in this case) symbols are defined in a single line.
310 // * The width of the columns seems to be the same within a single file, but not across files (?)
311 // * Looking at a single symbol:
312 // * There are 3 columns: name, psect, value
313 // * BUT the psect column can be empty!!!
314 // This in combination with an unknown column-width makes parsing difficult.
315 // The heuristic we use is that the last column must match: [0-9A-Fa-f]{4}
316 auto tokens = StringOp::split_view<StringOp::REMOVE_EMPTY_PARTS>(line, whitespace);
317 auto it = tokens.begin();
318 auto et = tokens.end();
319 while (it != et) {
320 auto label = *it++;
321
322 if (it == et) break;
323 auto value = *it++; // this could either be the psect or the value column
324 if (auto val = is4DigitHex(value)) {
325 result.symbols.push_back(Symbol{std::string(label), *val});
326 continue;
327 }
328
329 if (it == et) break;
330 value = *it++; // try again with 3rd column
331 auto val = is4DigitHex(value);
332 if (!val) break; // if this also doesn't work there's something wrong, skip this line
333 result.symbols.push_back(Symbol{std::string(label), *val});
334 }
335 }
336
337 return result;
338}
339
340[[nodiscard]] SymbolFile SymbolManager::loadSymbolFile(const std::string& filename, SymbolFile::Type type)
341{
342 File file(filename);
343 auto buf = file.mmap();
344 std::string_view buffer(std::bit_cast<const char*>(buf.data()), buf.size());
345
346 if (type == SymbolFile::Type::AUTO_DETECT) {
347 type = detectType(filename, buffer);
348 }
349 assert(type != SymbolFile::Type::AUTO_DETECT);
350
351 switch (type) {
353 return loadASMSX(filename, buffer);
355 return loadGeneric(filename, buffer);
357 return loadHTC(filename, buffer);
359 return loadLinkMap(filename, buffer);
361 return loadNoICE(filename, buffer);
363 return loadVASM(filename, buffer);
364 default: UNREACHABLE;
365 }
366}
367
368void SymbolManager::refresh()
369{
370 // Drop caches
371 lookupValueCache.clear();
372
373 // Allow to access symbol-values in Tcl expression with syntax: $sym(JIFFY)
374 auto& interp = commandController.getInterpreter();
375 TclObject arrayName("sym");
376 interp.unsetVariable(arrayName.getString().c_str());
377 for (const auto& file : files) {
378 for (const auto& sym : file.symbols) {
379 interp.setVariable(arrayName, TclObject(sym.name), TclObject(sym.value));
380 }
381 }
382
383 if (observer) observer->notifySymbolsChanged();
384}
385
386bool SymbolManager::reloadFile(const std::string& filename, LoadEmpty loadEmpty, SymbolFile::Type type)
387{
388 auto file = loadSymbolFile(filename, type); // might throw
389 if (file.symbols.empty() && loadEmpty == LoadEmpty::NOT_ALLOWED) return false;
390
391 auto it = ranges::find(files, filename, &SymbolFile::filename);
392 if (it == files.end()) {
393 files.push_back(std::move(file));
394 } else {
395 *it = std::move(file);
396 }
397 refresh();
398 return true;
399}
400
401void SymbolManager::removeFile(std::string_view filename)
402{
403 auto it = ranges::find(files, filename, &SymbolFile::filename);
404 if (it == files.end()) return; // not found
405 files.erase(it);
406 refresh();
407}
408
410{
411 files.clear();
412 refresh();
413}
414
415std::optional<uint16_t> SymbolManager::parseSymbolOrValue(std::string_view str) const
416{
417 // linear search is fine: only used interactively
418 // prefer an exact match
419 for (const auto& file : files) {
420 if (auto it = ranges::find(file.symbols, str, &Symbol::name);
421 it != file.symbols.end()) {
422 return it->value;
423 }
424 }
425 // but if not found, a case-insensitive match is fine as well
426 for (const auto& file : files) {
427 if (auto it = ranges::find_if(file.symbols, [&](const auto& sym) {
428 return StringOp::casecmp{}(str, sym.name); });
429 it != file.symbols.end()) {
430 return it->value;
431 }
432 }
433 // also not found, then try to parse as a numerical value
434 return parseValue(str);
435}
436
437std::span<Symbol const * const> SymbolManager::lookupValue(uint16_t value)
438{
439 if (lookupValueCache.empty()) {
440 for (const auto& file : files) {
441 for (const auto& sym : file.symbols) {
442 auto [it, inserted] = lookupValueCache.try_emplace(sym.value, std::vector<const Symbol*>{});
443 it->second.push_back(&sym);
444 }
445 }
446 }
447 if (auto* sym = lookup(lookupValueCache, value)) {
448 return *sym;
449 }
450 return {};
451}
452
453std::string SymbolManager::getFileFilters()
454{
455 return "Auto-detect file type (*){.*},"
456 "asMSX 0.x symbol files (*.sym){.sym},"
457 "HiTech C link map files (*.map){.map},"
458 "HiTech C symbol files (*.sym){.sym},"
459 "NoICE command files (*.noi){.noi},"
460 "pasmo symbol files (*.symbol *.publics *.sys){.symbol,.publics,.sys},"
461 "tniASM 0.x symbol files (*.sym){.sym},"
462 "tniASM 1.x symbol files (*.sym){.sym},"
463 "vasm symbol files (*.sym){.sym}";
464}
465
466SymbolFile::Type SymbolManager::getTypeForFilter(std::string_view filter)
467{
468 if (filter.starts_with("Auto")) {
469 return SymbolFile::Type::AUTO_DETECT;
470 } else if (filter.starts_with("asMSX")) {
471 return SymbolFile::Type::ASMSX;
472 } else if (filter.starts_with("HiTechC link")) {
473 return SymbolFile::Type::LINKMAP;
474 } else if (filter.starts_with("HiTechC symbol")) {
475 return SymbolFile::Type::HTC;
476 } else if (filter.starts_with("NoICE")) {
477 return SymbolFile::Type::NOICE;
478 } else if (filter.starts_with("vasm")) {
479 return SymbolFile::Type::VASM;
480 } else {
481 return SymbolFile::Type::GENERIC;
482 }
483}
484
485} // namespace openmsx
virtual Interpreter & getInterpreter()=0
std::span< const uint8_t > mmap()
Map file in memory.
Definition File.cc:102
static std::optional< unsigned > isHexDigit(char c)
bool reloadFile(const std::string &filename, LoadEmpty loadEmpty, SymbolFile::Type type)
static SymbolFile loadNoICE(const std::string &filename, std::string_view buffer)
static SymbolFile loadHTC(const std::string &filename, std::string_view buffer)
static SymbolFile loadSymbolFile(const std::string &filename, SymbolFile::Type type)
static SymbolFile loadASMSX(const std::string &filename, std::string_view buffer)
static std::optional< uint16_t > is4DigitHex(std::string_view s)
void removeFile(std::string_view filename)
static SymbolFile loadVASM(const std::string &filename, std::string_view buffer)
static SymbolFile loadLines(const std::string &filename, std::string_view buffer, SymbolFile::Type type, function_ref< std::optional< Symbol >(std::span< std::string_view >)> lineParser)
static std::optional< Symbol > checkLabel(std::string_view label, uint16_t value)
static std::optional< uint16_t > parseValue(std::string_view str)
static SymbolFile loadLinkMap(const std::string &filename, std::string_view buffer)
std::optional< uint16_t > parseSymbolOrValue(std::string_view s) const
static SymbolFile loadGeneric(const std::string &filename, std::string_view buffer)
SymbolManager(CommandController &commandController)
static std::optional< Symbol > checkLabelAndValue(std::string_view label, std::string_view value)
static SymbolFile::Type detectType(const std::string &filename, std::string_view buffer)
Like std::string_view, but with the extra guarantee that it refers to a zero-terminated string.
const Value * lookup(const hash_map< Key, Value, Hasher, Equal > &map, const Key2 &key)
Definition hash_map.hh:118
std::pair< string_view, string_view > splitOnFirst(string_view str, string_view chars)
Definition StringOp.cc:95
std::string toLower(std::string_view str)
Definition StringOp.cc:26
bool containsCaseInsensitive(std::string_view haystack, std::string_view needle)
Definition StringOp.hh:181
auto split_view(std::string_view str, Separators separators)
Definition StringOp.hh:83
This file implemented 3 utility functions:
Definition Autofire.cc:9
auto find_if(InputRange &&range, UnaryPredicate pred)
Definition ranges.hh:173
auto find(InputRange &&range, const T &value)
Definition ranges.hh:160
constexpr auto take(ForwardRange &&range, size_t n)
Definition view.hh:544
constexpr from_range_t from_range
static std::optional< Type > parseType(std::string_view str)
static zstring_view toString(Type type)
std::vector< Symbol > symbols
virtual void notifySymbolsChanged()=0
std::string name
#define UNREACHABLE