openMSX
TclParser.cc
Go to the documentation of this file.
1 #include "TclParser.hh"
2 #include "ScopedAssign.hh"
3 #include "one_of.hh"
4 #include "ranges.hh"
5 #include "strCat.hh"
6 #include "StringOp.hh"
7 #include <algorithm>
8 #include <iostream>
9 #include <cassert>
10 
11 using std::string;
12 
13 #if DEBUG_TCLPARSER
14 void TclParser::DEBUG_PRINT(const string& s)
15 {
16  std::cout << string(2 * level, ' ') << s << '\n';
17 }
18 
19 static std::string_view type2string(int type)
20 {
21  switch (type) {
22  case TCL_TOKEN_WORD:
23  return "word";
24  case TCL_TOKEN_SIMPLE_WORD:
25  return "simple word";
26  case TCL_TOKEN_EXPAND_WORD:
27  return "expand word";
28  case TCL_TOKEN_TEXT:
29  return "text";
30  case TCL_TOKEN_BS:
31  return "bs";
32  case TCL_TOKEN_COMMAND:
33  return "command";
34  case TCL_TOKEN_VARIABLE:
35  return "variable";
36  case TCL_TOKEN_SUB_EXPR:
37  return "sub expr";
38  case TCL_TOKEN_OPERATOR:
39  return "operator";
40  default:
41  assert(false);
42  return {};
43  }
44 }
45 #endif
46 
47 static bool inRange(char c, char low, char high)
48 {
49  unsigned t = c - low;
50  return t <= unsigned(high - low);
51 }
52 
53 static bool isNumber(std::string_view str)
54 {
55  if (StringOp::startsWith(str, '-') || StringOp::startsWith(str, '+')) {
56  str.remove_prefix(1);
57  }
58  if (StringOp::startsWith(str, "0x") || StringOp::startsWith(str, "0X")) {
59  str.remove_prefix(2);
60  return ranges::all_of(str, [](char c) {
61  return inRange(c, '0', '9') ||
62  inRange(c, 'a', 'f') ||
63  inRange(c, 'A', 'F');
64  });
65  } else {
66  return ranges::all_of(str,
67  [](char c) { return inRange(c, '0', '9'); });
68  }
69 }
70 
71 
72 TclParser::TclParser(Tcl_Interp* interp_, std::string_view input)
73  : interp(interp_)
74  , colors(input.size(), '.')
75  , parseStr(input)
76  , offset(0)
78  , level(0)
79 #endif
80 {
81  parse(parseStr.data(), int(parseStr.size()), COMMAND);
82 }
83 
84 void TclParser::parse(const char* p, int size, ParseType type)
85 {
86  ScopedAssign<int> sa1(offset, offset + (p - parseStr.data()));
87  ScopedAssign sa2(parseStr, string(p, size));
88  last.push_back(offset);
89 
90  // The functions Tcl_ParseCommand() and Tcl_ParseExpr() are meant to
91  // operate on a complete command. For interactive syntax highlighting
92  // we also want to pass incomplete commands (e.g. with an opening, but
93  // not yet a closing brace). This loop tries to parse and depening on
94  // the parse error retries with a completed command.
95  Tcl_Parse parseInfo;
96  int retryCount = 0;
97  while (true) {
98  int parseStatus = (type == EXPRESSION)
99  ? Tcl_ParseExpr(interp, parseStr.data(), int(parseStr.size()), &parseInfo)
100  : Tcl_ParseCommand(interp, parseStr.data(), int(parseStr.size()), 1, &parseInfo);
101  if (parseStatus == TCL_OK) break;
102  Tcl_FreeParse(&parseInfo);
103  ++retryCount;
104 
105  bool allowComplete = ((offset + parseStr.size()) >= colors.size()) &&
106  (retryCount < 10);
107  Tcl_Obj* resObj = Tcl_GetObjResult(interp);
108  int resLen;
109  const char* resStr = Tcl_GetStringFromObj(resObj, &resLen);
110  std::string_view error(resStr, resLen);
111 
112  if (allowComplete && StringOp::startsWith(error, "missing close-brace")) {
113  parseStr += '}';
114  } else if (allowComplete && StringOp::startsWith(error, "missing close-bracket")) {
115  parseStr += ']';
116  } else if (allowComplete && StringOp::startsWith(error, "missing \"")) {
117  parseStr += '"';
118  } else if (allowComplete && StringOp::startsWith(error, "unbalanced open paren")) {
119  parseStr += ')';
120  } else if (allowComplete && StringOp::startsWith(error, "missing operand")) {
121  // This also triggers for a (wrong) expression like
122  // 'if { / 3'
123  // and that can't be solved by adding something at the
124  // end. Without the retryCount stuff we would get in an
125  // infinte loop here.
126  parseStr += '0';
127  } else if (allowComplete && StringOp::startsWith(error, "missing )")) {
128  parseStr += ')';
129  } else {
130  DEBUG_PRINT("ERROR: " + parseStr + ": " + error);
131  setColors(parseStr.data(), int(parseStr.size()), 'E');
132  if ((offset + size) < int(colors.size())) last.pop_back();
133  return;
134  }
135  }
136 
137  if (type == EXPRESSION) {
138  DEBUG_PRINT("EXPRESSION: " + parseStr);
139  } else {
140  if (parseInfo.commentSize) {
141  DEBUG_PRINT("COMMENT: " + std::string_view(parseInfo.commentStart, parseInfo.commentSize));
142  setColors(parseInfo.commentStart, parseInfo.commentSize, 'c');
143  }
144  DEBUG_PRINT("COMMAND: " + std::string_view(parseInfo.commandStart, parseInfo.commandSize));
145  }
146  printTokens(parseInfo.tokenPtr, parseInfo.numTokens);
147 
148  // If the current sub-command stops before the end of the original
149  // full command, then it's not the last sub-command. Note that
150  // sub-commands can be nested.
151  if ((offset + size) < int(colors.size())) last.pop_back();
152 
153  const char* nextStart = parseInfo.commandStart + parseInfo.commandSize;
154  Tcl_FreeParse(&parseInfo);
155 
156  if (type == COMMAND) {
157  // next command
158  int nextSize = int((parseStr.data() + parseStr.size()) - nextStart);
159  if (nextSize > 0) {
160  parse(nextStart, nextSize, type);
161  }
162  }
163 }
164 
165 void TclParser::printTokens(Tcl_Token* tokens, int numTokens)
166 {
167 #if DEBUG_TCLPARSER
168  ScopedAssign sa(level, level + 1);
169 #endif
170  for (int i = 0; i < numTokens; ) {
171  Tcl_Token& token = tokens[i];
172  std::string_view tokenStr(token.start, token.size);
173  DEBUG_PRINT(type2string(token.type) + " -> " + tokenStr);
174  switch (token.type) {
175  case TCL_TOKEN_VARIABLE:
176  assert(token.numComponents >= 1);
177  setColors(tokens[i + 1].start - 1, tokens[i + 1].size + 1, 'v');
178  break;
179  case TCL_TOKEN_WORD:
180  case TCL_TOKEN_SIMPLE_WORD:
181  if (*token.start == '"') {
182  setColors(token.start, token.size, 'l');
183  }
184  if ((i == 0) && isProc(interp, tokenStr)) {
185  setColors(token.start, token.size, 'p');
186  }
187  break;
188  case TCL_TOKEN_EXPAND_WORD:
189  setColors(token.start, 3, 'o');
190  break;
191  case TCL_TOKEN_OPERATOR:
192  case TCL_TOKEN_BS:
193  setColors(token.start, token.size, 'o');
194  break;
195  case TCL_TOKEN_TEXT:
196  if (isNumber(tokenStr) || (*token.start == '"')) {
197  // TODO only works if the same as 'l'
198  setColors(token.start, token.size, 'l');
199  }
200  break;
201  }
202  if (token.type == TCL_TOKEN_COMMAND) {
203  parse(token.start + 1, token.size - 2, COMMAND);
204  } else if (token.type == TCL_TOKEN_SIMPLE_WORD) {
205  ParseType subType = guessSubType(tokens, i);
206  if (subType != OTHER) {
207  parse(tokens[i + 1].start, tokens[i + 1].size, subType);
208  }
209  }
210  printTokens(&tokens[++i], token.numComponents);
211  i += token.numComponents;
212  }
213 }
214 
215 TclParser::ParseType TclParser::guessSubType(Tcl_Token* tokens, int i)
216 {
217  // heuristic: if previous token is 'if' then assume this is an expression
218  if ((i >= 1) && (tokens[i - 1].type == TCL_TOKEN_TEXT)) {
219  std::string_view prevText(tokens[i - 1].start, tokens[i - 1].size);
220  if (prevText == one_of("if", "elseif", "expr")) {
221  return EXPRESSION;
222  }
223  }
224 
225  // heuristic: parse text that starts with { as a subcommand
226  if (*tokens[i].start == '{') {
227  return COMMAND;
228  }
229 
230  // a plain text element
231  return OTHER;
232 }
233 
234 bool TclParser::isProc(Tcl_Interp* interp, std::string_view str)
235 {
236  string command = strCat("openmsx::is_command_name {", str, '}');
237  if (Tcl_Eval(interp, command.c_str()) != TCL_OK) return false;
238  int result;
239  if (Tcl_GetBooleanFromObj(interp, Tcl_GetObjResult(interp), &result)
240  != TCL_OK) return false;
241  return result != 0;
242 }
243 
244 void TclParser::setColors(const char* p, int size, char c)
245 {
246  int start = (p - parseStr.data()) + offset;
247  int stop = std::min(start + size, int(colors.size()));
248  for (int i = start; i < stop; ++i) {
249  colors[i] = c;
250  }
251 }
one_of.hh
StringOp::startsWith
bool startsWith(string_view total, string_view part)
Definition: StringOp.cc:33
gl::min
vecN< N, T > min(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:274
ScopedAssign
Assign new value to some variable and restore the original value when this object goes out of scope.
Definition: ScopedAssign.hh:8
utf8::unchecked::size
size_t size(std::string_view utf8)
Definition: utf8_unchecked.hh:227
t
TclObject t
Definition: TclObject_test.cc:264
ranges.hh
ranges::all_of
bool all_of(InputRange &&range, UnaryPredicate pred)
Definition: ranges.hh:119
ScopedAssign.hh
DEBUG_PRINT
#define DEBUG_PRINT(x)
Definition: TclParser.hh:57
TclParser::isProc
static bool isProc(Tcl_Interp *interp, std::string_view str)
Is the given string a valid Tcl command.
Definition: TclParser.cc:234
one_of
Definition: one_of.hh:7
TclParser::TclParser
TclParser(Tcl_Interp *interp, std::string_view input)
Input: Tcl interpreter and command to parse
Definition: TclParser.cc:72
TclParser.hh
StringOp.hh
strCat.hh
DEBUG_TCLPARSER
#define DEBUG_TCLPARSER
Definition: TclParser.hh:9
strCat
std::string strCat(Ts &&...ts)
Definition: strCat.hh:573