openMSX
TclParser.cc
Go to the documentation of this file.
1#include "TclParser.hh"
2#include "ScopedAssign.hh"
3#include "narrow.hh"
4#include "one_of.hh"
5#include "ranges.hh"
6#include "strCat.hh"
7#include <algorithm>
8#include <iostream>
9#include <cassert>
10
11#if DEBUG_TCLPARSER
12void TclParser::DEBUG_PRINT(const std::string& s)
13{
14 std::cout << std::string(2 * level, ' ') << s << '\n';
15}
16
17static constexpr std::string_view type2string(int type)
18{
19 switch (type) {
20 case TCL_TOKEN_WORD:
21 return "word";
22 case TCL_TOKEN_SIMPLE_WORD:
23 return "simple word";
24 case TCL_TOKEN_EXPAND_WORD:
25 return "expand word";
26 case TCL_TOKEN_TEXT:
27 return "text";
28 case TCL_TOKEN_BS:
29 return "bs";
30 case TCL_TOKEN_COMMAND:
31 return "command";
32 case TCL_TOKEN_VARIABLE:
33 return "variable";
34 case TCL_TOKEN_SUB_EXPR:
35 return "sub expr";
36 case TCL_TOKEN_OPERATOR:
37 return "operator";
38 default:
39 assert(false);
40 return {};
41 }
42}
43#endif
44
45static constexpr bool inRange(char c, char low, char high)
46{
47 unsigned t = c - low;
48 return t <= unsigned(high - low);
49}
50
51static bool isNumber(std::string_view str)
52{
53 if (str.starts_with('-') || str.starts_with('+')) {
54 str.remove_prefix(1);
55 }
56 if (str.starts_with("0x") || str.starts_with("0X")) {
57 str.remove_prefix(2);
58 return ranges::all_of(str, [](char c) {
59 return inRange(c, '0', '9') ||
60 inRange(c, 'a', 'f') ||
61 inRange(c, 'A', 'F');
62 });
63 } else {
64 return ranges::all_of(str,
65 [](char c) { return inRange(c, '0', '9'); });
66 }
67}
68
69
70TclParser::TclParser(Tcl_Interp* interp_, std::string_view input)
71 : interp(interp_)
72 , colors(input.size(), '.')
73 , parseStr(input)
74{
75 parse(parseStr.data(), narrow<int>(parseStr.size()), COMMAND);
76}
77
78void TclParser::parse(const char* p, int size, ParseType type)
79{
80 ScopedAssign sa1(offset, offset + narrow<int>(p - parseStr.data()));
81 ScopedAssign sa2(parseStr, std::string(p, size));
82 last.push_back(offset);
83
84 // The functions Tcl_ParseCommand() and Tcl_ParseExpr() are meant to
85 // operate on a complete command. For interactive syntax highlighting
86 // we also want to pass incomplete commands (e.g. with an opening, but
87 // not yet a closing brace). This loop tries to parse and depending on
88 // the parse error retries with a completed command.
89 Tcl_Parse parseInfo;
90 int retryCount = 0;
91 while (true) {
92 int parseStatus = (type == EXPRESSION)
93 ? Tcl_ParseExpr(interp, parseStr.data(), int(parseStr.size()), &parseInfo)
94 : Tcl_ParseCommand(interp, parseStr.data(), int(parseStr.size()), 1, &parseInfo);
95 if (parseStatus == TCL_OK) break;
96 Tcl_FreeParse(&parseInfo);
97 ++retryCount;
98
99 bool allowComplete = ((offset + parseStr.size()) >= colors.size()) &&
100 (retryCount < 10);
101 Tcl_Obj* resObj = Tcl_GetObjResult(interp);
102 int resLen;
103 const char* resStr = Tcl_GetStringFromObj(resObj, &resLen);
104 std::string_view error(resStr, resLen);
105
106 if (allowComplete && error.starts_with("missing close-brace")) {
107 parseStr += '}';
108 } else if (allowComplete && error.starts_with("missing close-bracket")) {
109 parseStr += ']';
110 } else if (allowComplete && error.starts_with("missing \"")) {
111 parseStr += '"';
112 } else if (allowComplete && error.starts_with("unbalanced open paren")) {
113 parseStr += ')';
114 } else if (allowComplete && error.starts_with("missing operand")) {
115 // This also triggers for a (wrong) expression like
116 // 'if { / 3'
117 // and that can't be solved by adding something at the
118 // end. Without the retryCount stuff we would get in an
119 // infinite loop here.
120 parseStr += '0';
121 } else if (allowComplete && error.starts_with("missing )")) {
122 parseStr += ')';
123 } else {
124 DEBUG_PRINT("ERROR: " + parseStr + ": " + error);
125 setColors(parseStr.data(), int(parseStr.size()), 'E');
126 if ((offset + size) < int(colors.size())) last.pop_back();
127 return;
128 }
129 }
130
131 if (type == EXPRESSION) {
132 DEBUG_PRINT("EXPRESSION: " + parseStr);
133 } else {
134 if (parseInfo.commentSize) {
135 DEBUG_PRINT("COMMENT: " + std::string_view(parseInfo.commentStart, parseInfo.commentSize));
136 setColors(parseInfo.commentStart, parseInfo.commentSize, 'c');
137 }
138 DEBUG_PRINT("COMMAND: " + std::string_view(parseInfo.commandStart, parseInfo.commandSize));
139 }
140 printTokens({parseInfo.tokenPtr, size_t(parseInfo.numTokens)});
141
142 // If the current sub-command stops before the end of the original
143 // full command, then it's not the last sub-command. Note that
144 // sub-commands can be nested.
145 if ((offset + size) < int(colors.size())) last.pop_back();
146
147 const char* nextStart = parseInfo.commandStart + parseInfo.commandSize;
148 Tcl_FreeParse(&parseInfo);
149
150 if (type == COMMAND) {
151 // next command
152 auto nextSize = int((parseStr.data() + parseStr.size()) - nextStart);
153 if (nextSize > 0) {
154 parse(nextStart, nextSize, type);
155 }
156 }
157}
158
159void TclParser::printTokens(std::span<const Tcl_Token> tokens)
160{
161#if DEBUG_TCLPARSER
162 ScopedAssign sa(level, level + 1);
163#endif
164 for (size_t i = 0; i < tokens.size(); ) {
165 const Tcl_Token& token = tokens[i];
166 std::string_view tokenStr(token.start, token.size);
167 DEBUG_PRINT(type2string(token.type) + " -> " + tokenStr);
168 switch (token.type) {
169 case TCL_TOKEN_VARIABLE:
170 assert(token.numComponents >= 1);
171 setColors(tokens[i + 1].start - 1, tokens[i + 1].size + 1, 'v');
172 break;
173 case TCL_TOKEN_WORD:
174 case TCL_TOKEN_SIMPLE_WORD:
175 if (*token.start == '"') {
176 setColors(token.start, token.size, 'l');
177 }
178 if ((i == 0) && isProc(interp, tokenStr)) {
179 setColors(token.start, token.size, 'p');
180 }
181 break;
182 case TCL_TOKEN_EXPAND_WORD:
183 setColors(token.start, 3, 'o');
184 break;
185 case TCL_TOKEN_OPERATOR:
186 case TCL_TOKEN_BS:
187 setColors(token.start, token.size, 'o');
188 break;
189 case TCL_TOKEN_TEXT:
190 if (isNumber(tokenStr) || (*token.start == '"')) {
191 // TODO only works if the same as 'l'
192 setColors(token.start, token.size, 'l');
193 }
194 break;
195 }
196 if (token.type == TCL_TOKEN_COMMAND) {
197 parse(token.start + 1, token.size - 2, COMMAND);
198 } else if (token.type == TCL_TOKEN_SIMPLE_WORD) {
199 ParseType subType = guessSubType(tokens, i);
200 if (subType != OTHER) {
201 parse(tokens[i + 1].start, tokens[i + 1].size, subType);
202 }
203 }
204 printTokens(tokens.subspan(++i, token.numComponents));
205 i += token.numComponents;
206 }
207}
208
209TclParser::ParseType TclParser::guessSubType(std::span<const Tcl_Token> tokens, size_t i)
210{
211 // heuristic: if previous token is 'if' then assume this is an expression
212 if ((i >= 1) && (tokens[i - 1].type == TCL_TOKEN_TEXT)) {
213 std::string_view prevText(tokens[i - 1].start, tokens[i - 1].size);
214 if (prevText == one_of("if", "elseif", "expr")) {
215 return EXPRESSION;
216 }
217 }
218
219 // heuristic: parse text that starts with { as a subcommand
220 if (*tokens[i].start == '{') {
221 return COMMAND;
222 }
223
224 // a plain text element
225 return OTHER;
226}
227
228bool TclParser::isProc(Tcl_Interp* interp, std::string_view str)
229{
230 auto command = tmpStrCat("openmsx::is_command_name {", str, '}');
231 if (Tcl_Eval(interp, command.c_str()) != TCL_OK) return false;
232 int result;
233 if (Tcl_GetBooleanFromObj(interp, Tcl_GetObjResult(interp), &result)
234 != TCL_OK) return false;
235 return result != 0;
236}
237
238void TclParser::setColors(const char* p, int size, char c)
239{
240 int start = narrow<int>(p - parseStr.data()) + offset;
241 int stop = std::min(start + size, int(colors.size()));
242 for (auto i : xrange(start, stop)) {
243 colors[i] = c;
244 }
245}
TclObject t
#define DEBUG_PRINT(x)
Definition TclParser.hh:58
Assign new value to some variable and restore the original value when this object goes out of scope.
TclParser(Tcl_Interp *interp, std::string_view input)
Input: Tcl interpreter and command to parse
Definition TclParser.cc:70
static bool isProc(Tcl_Interp *interp, std::string_view str)
Is the given string a valid Tcl command.
Definition TclParser.cc:228
constexpr bool all_of(InputRange &&range, UnaryPredicate pred)
Definition ranges.hh:188
TemporaryString tmpStrCat(Ts &&... ts)
Definition strCat.hh:742
constexpr auto xrange(T e)
Definition xrange.hh:132