openMSX
TclParser.cc
Go to the documentation of this file.
1 #include "TclParser.hh"
2 #include "ScopedAssign.hh"
3 #include "one_of.hh"
4 #include "ranges.hh"
5 #include "strCat.hh"
6 #include <algorithm>
7 #include <iostream>
8 #include <cassert>
9 
10 #if DEBUG_TCLPARSER
11 void TclParser::DEBUG_PRINT(const std::string& s)
12 {
13  std::cout << std::string(2 * level, ' ') << s << '\n';
14 }
15 
16 static constexpr std::string_view type2string(int type)
17 {
18  switch (type) {
19  case TCL_TOKEN_WORD:
20  return "word";
21  case TCL_TOKEN_SIMPLE_WORD:
22  return "simple word";
23  case TCL_TOKEN_EXPAND_WORD:
24  return "expand word";
25  case TCL_TOKEN_TEXT:
26  return "text";
27  case TCL_TOKEN_BS:
28  return "bs";
29  case TCL_TOKEN_COMMAND:
30  return "command";
31  case TCL_TOKEN_VARIABLE:
32  return "variable";
33  case TCL_TOKEN_SUB_EXPR:
34  return "sub expr";
35  case TCL_TOKEN_OPERATOR:
36  return "operator";
37  default:
38  assert(false);
39  return {};
40  }
41 }
42 #endif
43 
44 static constexpr bool inRange(char c, char low, char high)
45 {
46  unsigned t = c - low;
47  return t <= unsigned(high - low);
48 }
49 
50 static bool isNumber(std::string_view str)
51 {
52  if (str.starts_with('-') || str.starts_with('+')) {
53  str.remove_prefix(1);
54  }
55  if (str.starts_with("0x") || str.starts_with("0X")) {
56  str.remove_prefix(2);
57  return ranges::all_of(str, [](char c) {
58  return inRange(c, '0', '9') ||
59  inRange(c, 'a', 'f') ||
60  inRange(c, 'A', 'F');
61  });
62  } else {
63  return ranges::all_of(str,
64  [](char c) { return inRange(c, '0', '9'); });
65  }
66 }
67 
68 
69 TclParser::TclParser(Tcl_Interp* interp_, std::string_view input)
70  : interp(interp_)
71  , colors(input.size(), '.')
72  , parseStr(input)
73  , offset(0)
75  , level(0)
76 #endif
77 {
78  parse(parseStr.data(), int(parseStr.size()), COMMAND);
79 }
80 
81 void TclParser::parse(const char* p, int size, ParseType type)
82 {
83  ScopedAssign<int> sa1(offset, offset + (p - parseStr.data()));
84  ScopedAssign sa2(parseStr, std::string(p, size));
85  last.push_back(offset);
86 
87  // The functions Tcl_ParseCommand() and Tcl_ParseExpr() are meant to
88  // operate on a complete command. For interactive syntax highlighting
89  // we also want to pass incomplete commands (e.g. with an opening, but
90  // not yet a closing brace). This loop tries to parse and depending on
91  // the parse error retries with a completed command.
92  Tcl_Parse parseInfo;
93  int retryCount = 0;
94  while (true) {
95  int parseStatus = (type == EXPRESSION)
96  ? Tcl_ParseExpr(interp, parseStr.data(), int(parseStr.size()), &parseInfo)
97  : Tcl_ParseCommand(interp, parseStr.data(), int(parseStr.size()), 1, &parseInfo);
98  if (parseStatus == TCL_OK) break;
99  Tcl_FreeParse(&parseInfo);
100  ++retryCount;
101 
102  bool allowComplete = ((offset + parseStr.size()) >= colors.size()) &&
103  (retryCount < 10);
104  Tcl_Obj* resObj = Tcl_GetObjResult(interp);
105  int resLen;
106  const char* resStr = Tcl_GetStringFromObj(resObj, &resLen);
107  std::string_view error(resStr, resLen);
108 
109  if (allowComplete && error.starts_with("missing close-brace")) {
110  parseStr += '}';
111  } else if (allowComplete && error.starts_with("missing close-bracket")) {
112  parseStr += ']';
113  } else if (allowComplete && error.starts_with("missing \"")) {
114  parseStr += '"';
115  } else if (allowComplete && error.starts_with("unbalanced open paren")) {
116  parseStr += ')';
117  } else if (allowComplete && error.starts_with("missing operand")) {
118  // This also triggers for a (wrong) expression like
119  // 'if { / 3'
120  // and that can't be solved by adding something at the
121  // end. Without the retryCount stuff we would get in an
122  // infinite loop here.
123  parseStr += '0';
124  } else if (allowComplete && error.starts_with("missing )")) {
125  parseStr += ')';
126  } else {
127  DEBUG_PRINT("ERROR: " + parseStr + ": " + error);
128  setColors(parseStr.data(), int(parseStr.size()), 'E');
129  if ((offset + size) < int(colors.size())) last.pop_back();
130  return;
131  }
132  }
133 
134  if (type == EXPRESSION) {
135  DEBUG_PRINT("EXPRESSION: " + parseStr);
136  } else {
137  if (parseInfo.commentSize) {
138  DEBUG_PRINT("COMMENT: " + std::string_view(parseInfo.commentStart, parseInfo.commentSize));
139  setColors(parseInfo.commentStart, parseInfo.commentSize, 'c');
140  }
141  DEBUG_PRINT("COMMAND: " + std::string_view(parseInfo.commandStart, parseInfo.commandSize));
142  }
143  printTokens(parseInfo.tokenPtr, parseInfo.numTokens);
144 
145  // If the current sub-command stops before the end of the original
146  // full command, then it's not the last sub-command. Note that
147  // sub-commands can be nested.
148  if ((offset + size) < int(colors.size())) last.pop_back();
149 
150  const char* nextStart = parseInfo.commandStart + parseInfo.commandSize;
151  Tcl_FreeParse(&parseInfo);
152 
153  if (type == COMMAND) {
154  // next command
155  int nextSize = int((parseStr.data() + parseStr.size()) - nextStart);
156  if (nextSize > 0) {
157  parse(nextStart, nextSize, type);
158  }
159  }
160 }
161 
162 void TclParser::printTokens(Tcl_Token* tokens, int numTokens)
163 {
164 #if DEBUG_TCLPARSER
165  ScopedAssign sa(level, level + 1);
166 #endif
167  for (int i = 0; i < numTokens; ) {
168  Tcl_Token& token = tokens[i];
169  std::string_view tokenStr(token.start, token.size);
170  DEBUG_PRINT(type2string(token.type) + " -> " + tokenStr);
171  switch (token.type) {
172  case TCL_TOKEN_VARIABLE:
173  assert(token.numComponents >= 1);
174  setColors(tokens[i + 1].start - 1, tokens[i + 1].size + 1, 'v');
175  break;
176  case TCL_TOKEN_WORD:
177  case TCL_TOKEN_SIMPLE_WORD:
178  if (*token.start == '"') {
179  setColors(token.start, token.size, 'l');
180  }
181  if ((i == 0) && isProc(interp, tokenStr)) {
182  setColors(token.start, token.size, 'p');
183  }
184  break;
185  case TCL_TOKEN_EXPAND_WORD:
186  setColors(token.start, 3, 'o');
187  break;
188  case TCL_TOKEN_OPERATOR:
189  case TCL_TOKEN_BS:
190  setColors(token.start, token.size, 'o');
191  break;
192  case TCL_TOKEN_TEXT:
193  if (isNumber(tokenStr) || (*token.start == '"')) {
194  // TODO only works if the same as 'l'
195  setColors(token.start, token.size, 'l');
196  }
197  break;
198  }
199  if (token.type == TCL_TOKEN_COMMAND) {
200  parse(token.start + 1, token.size - 2, COMMAND);
201  } else if (token.type == TCL_TOKEN_SIMPLE_WORD) {
202  ParseType subType = guessSubType(tokens, i);
203  if (subType != OTHER) {
204  parse(tokens[i + 1].start, tokens[i + 1].size, subType);
205  }
206  }
207  printTokens(&tokens[++i], token.numComponents);
208  i += token.numComponents;
209  }
210 }
211 
212 TclParser::ParseType TclParser::guessSubType(Tcl_Token* tokens, int i)
213 {
214  // heuristic: if previous token is 'if' then assume this is an expression
215  if ((i >= 1) && (tokens[i - 1].type == TCL_TOKEN_TEXT)) {
216  std::string_view prevText(tokens[i - 1].start, tokens[i - 1].size);
217  if (prevText == one_of("if", "elseif", "expr")) {
218  return EXPRESSION;
219  }
220  }
221 
222  // heuristic: parse text that starts with { as a subcommand
223  if (*tokens[i].start == '{') {
224  return COMMAND;
225  }
226 
227  // a plain text element
228  return OTHER;
229 }
230 
231 bool TclParser::isProc(Tcl_Interp* interp, std::string_view str)
232 {
233  auto command = tmpStrCat("openmsx::is_command_name {", str, '}');
234  if (Tcl_Eval(interp, command.c_str()) != TCL_OK) return false;
235  int result;
236  if (Tcl_GetBooleanFromObj(interp, Tcl_GetObjResult(interp), &result)
237  != TCL_OK) return false;
238  return result != 0;
239 }
240 
241 void TclParser::setColors(const char* p, int size, char c)
242 {
243  int start = (p - parseStr.data()) + offset;
244  int stop = std::min(start + size, int(colors.size()));
245  for (auto i : xrange(start, stop)) {
246  colors[i] = c;
247  }
248 }
TclObject t
#define DEBUG_TCLPARSER
Definition: TclParser.hh:9
#define DEBUG_PRINT(x)
Definition: TclParser.hh:57
Assign new value to some variable and restore the original value when this object goes out of scope.
Definition: ScopedAssign.hh:8
TclParser(Tcl_Interp *interp, std::string_view input)
Input: Tcl interpreter and command to parse
Definition: TclParser.cc:69
static bool isProc(Tcl_Interp *interp, std::string_view str)
Is the given string a valid Tcl command.
Definition: TclParser.cc:231
Definition: one_of.hh:7
constexpr vecN< N, T > min(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:258
bool all_of(InputRange &&range, UnaryPredicate pred)
Definition: ranges.hh:163
size_t size(std::string_view utf8)
TemporaryString tmpStrCat(Ts &&... ts)
Definition: strCat.hh:617
constexpr auto xrange(T e)
Definition: xrange.hh:133