openMSX
TclParser.cc
Go to the documentation of this file.
1 #include "TclParser.hh"
2 #include "ScopedAssign.hh"
3 #include "ranges.hh"
4 #include "strCat.hh"
5 #include <algorithm>
6 #include <iostream>
7 #include <cassert>
8 
9 using std::string;
10 
11 #if DEBUG_TCLPARSER
12 void TclParser::DEBUG_PRINT(const string& s)
13 {
14  std::cout << string(2 * level, ' ') << s << '\n';
15 }
16 
17 static string_view type2string(int type)
18 {
19  switch (type) {
20  case TCL_TOKEN_WORD:
21  return "word";
22  case TCL_TOKEN_SIMPLE_WORD:
23  return "simple word";
24  case TCL_TOKEN_EXPAND_WORD:
25  return "expand word";
26  case TCL_TOKEN_TEXT:
27  return "text";
28  case TCL_TOKEN_BS:
29  return "bs";
30  case TCL_TOKEN_COMMAND:
31  return "command";
32  case TCL_TOKEN_VARIABLE:
33  return "variable";
34  case TCL_TOKEN_SUB_EXPR:
35  return "sub expr";
36  case TCL_TOKEN_OPERATOR:
37  return "operator";
38  default:
39  assert(false);
40  return {};
41  }
42 }
43 #endif
44 
45 static bool inRange(char c, char low, char high)
46 {
47  unsigned t = c - low;
48  return t <= unsigned(high - low);
49 }
50 
51 static bool isNumber(string_view str)
52 {
53  if (str.starts_with('-') || str.starts_with('+')) {
54  str.pop_front();
55  }
56  if (str.starts_with("0x") || str.starts_with("0X")) {
57  str.remove_prefix(2);
58  return ranges::all_of(str, [](char c) {
59  return inRange(c, '0', '9') ||
60  inRange(c, 'a', 'f') ||
61  inRange(c, 'A', 'F');
62  });
63  } else {
64  return ranges::all_of(str,
65  [](char c) { return inRange(c, '0', '9'); });
66  }
67 }
68 
69 
70 TclParser::TclParser(Tcl_Interp* interp_, string_view input)
71  : interp(interp_)
72  , colors(input.size(), '.')
73  , parseStr(input.str())
74  , offset(0)
76  , level(0)
77 #endif
78 {
79  parse(parseStr.data(), int(parseStr.size()), COMMAND);
80 }
81 
82 void TclParser::parse(const char* p, int size, ParseType type)
83 {
84  ScopedAssign<int> sa1(offset, offset + (p - parseStr.data()));
85  ScopedAssign<string> sa2(parseStr, string(p, size));
86  last.push_back(offset);
87 
88  // The functions Tcl_ParseCommand() and Tcl_ParseExpr() are meant to
89  // operate on a complete command. For interactive syntax highlighting
90  // we also want to pass incomplete commands (e.g. with an opening, but
91  // not yet a closing brace). This loop tries to parse and depening on
92  // the parse error retries with a completed command.
93  Tcl_Parse parseInfo;
94  int retryCount = 0;
95  while (true) {
96  int parseStatus = (type == EXPRESSION)
97  ? Tcl_ParseExpr(interp, parseStr.data(), int(parseStr.size()), &parseInfo)
98  : Tcl_ParseCommand(interp, parseStr.data(), int(parseStr.size()), 1, &parseInfo);
99  if (parseStatus == TCL_OK) break;
100  Tcl_FreeParse(&parseInfo);
101  ++retryCount;
102 
103  bool allowComplete = ((offset + parseStr.size()) >= colors.size()) &&
104  (retryCount < 10);
105  Tcl_Obj* resObj = Tcl_GetObjResult(interp);
106  int resLen;
107  const char* resStr = Tcl_GetStringFromObj(resObj, &resLen);
108  string_view error(resStr, resLen);
109 
110  if (allowComplete && error.starts_with("missing close-brace")) {
111  parseStr += '}';
112  } else if (allowComplete && error.starts_with("missing close-bracket")) {
113  parseStr += ']';
114  } else if (allowComplete && error.starts_with( "missing \"")) {
115  parseStr += '"';
116  } else if (allowComplete && error.starts_with("unbalanced open paren")) {
117  parseStr += ')';
118  } else if (allowComplete && error.starts_with("missing operand")) {
119  // This also triggers for a (wrong) expression like
120  // 'if { / 3'
121  // and that can't be solved by adding something at the
122  // end. Without the retryCount stuff we would get in an
123  // infinte loop here.
124  parseStr += '0';
125  } else if (allowComplete && error.starts_with("missing )")) {
126  parseStr += ')';
127  } else {
128  DEBUG_PRINT("ERROR: " + parseStr + ": " + error);
129  setColors(parseStr.data(), int(parseStr.size()), 'E');
130  if ((offset + size) < int(colors.size())) last.pop_back();
131  return;
132  }
133  }
134 
135  if (type == EXPRESSION) {
136  DEBUG_PRINT("EXPRESSION: " + parseStr);
137  } else {
138  if (parseInfo.commentSize) {
139  DEBUG_PRINT("COMMENT: " + string_view(parseInfo.commentStart, parseInfo.commentSize));
140  setColors(parseInfo.commentStart, parseInfo.commentSize, 'c');
141  }
142  DEBUG_PRINT("COMMAND: " + string_view(parseInfo.commandStart, parseInfo.commandSize));
143  }
144  printTokens(parseInfo.tokenPtr, parseInfo.numTokens);
145 
146  // If the current sub-command stops before the end of the original
147  // full command, then it's not the last sub-command. Note that
148  // sub-commands can be nested.
149  if ((offset + size) < int(colors.size())) last.pop_back();
150 
151  const char* nextStart = parseInfo.commandStart + parseInfo.commandSize;
152  Tcl_FreeParse(&parseInfo);
153 
154  if (type == COMMAND) {
155  // next command
156  int nextSize = int((parseStr.data() + parseStr.size()) - nextStart);
157  if (nextSize > 0) {
158  parse(nextStart, nextSize, type);
159  }
160  }
161 }
162 
163 void TclParser::printTokens(Tcl_Token* tokens, int numTokens)
164 {
165 #if DEBUG_TCLPARSER
166  ScopedAssign<int> sa(level, level + 1);
167 #endif
168  for (int i = 0; i < numTokens; ) {
169  Tcl_Token& token = tokens[i];
170  string_view tokenStr(token.start, token.size);
171  DEBUG_PRINT(type2string(token.type) + " -> " + tokenStr);
172  switch (token.type) {
173  case TCL_TOKEN_VARIABLE:
174  assert(token.numComponents >= 1);
175  setColors(tokens[i + 1].start - 1, tokens[i + 1].size + 1, 'v');
176  break;
177  case TCL_TOKEN_WORD:
178  case TCL_TOKEN_SIMPLE_WORD:
179  if (*token.start == '"') {
180  setColors(token.start, token.size, 'l');
181  }
182  if ((i == 0) && isProc(interp, tokenStr)) {
183  setColors(token.start, token.size, 'p');
184  }
185  break;
186  case TCL_TOKEN_EXPAND_WORD:
187  setColors(token.start, 3, 'o');
188  break;
189  case TCL_TOKEN_OPERATOR:
190  case TCL_TOKEN_BS:
191  setColors(token.start, token.size, 'o');
192  break;
193  case TCL_TOKEN_TEXT:
194  if (isNumber(tokenStr) || (*token.start == '"')) {
195  // TODO only works if the same as 'l'
196  setColors(token.start, token.size, 'l');
197  }
198  break;
199  }
200  if (token.type == TCL_TOKEN_COMMAND) {
201  parse(token.start + 1, token.size - 2, COMMAND);
202  } else if (token.type == TCL_TOKEN_SIMPLE_WORD) {
203  ParseType subType = guessSubType(tokens, i);
204  if (subType != OTHER) {
205  parse(tokens[i + 1].start, tokens[i + 1].size, subType);
206  }
207  }
208  printTokens(&tokens[++i], token.numComponents);
209  i += token.numComponents;
210  }
211 }
212 
213 TclParser::ParseType TclParser::guessSubType(Tcl_Token* tokens, int i)
214 {
215  // heuristic: if previous token is 'if' then assume this is an expression
216  if ((i >= 1) && (tokens[i - 1].type == TCL_TOKEN_TEXT)) {
217  string_view prevText(tokens[i - 1].start, tokens[i - 1].size);
218  if ((prevText == "if") ||
219  (prevText == "elseif") ||
220  (prevText == "expr")) {
221  return EXPRESSION;
222  }
223  }
224 
225  // heuristic: parse text that starts with { as a subcommand
226  if (*tokens[i].start == '{') {
227  return COMMAND;
228  }
229 
230  // a plain text element
231  return OTHER;
232 }
233 
234 bool TclParser::isProc(Tcl_Interp* interp, string_view str)
235 {
236  string command = strCat("openmsx::is_command_name {", str, '}');
237  if (Tcl_Eval(interp, command.c_str()) != TCL_OK) return false;
238  int result;
239  if (Tcl_GetBooleanFromObj(interp, Tcl_GetObjResult(interp), &result)
240  != TCL_OK) return false;
241  return result != 0;
242 }
243 
244 void TclParser::setColors(const char* p, int size, char c)
245 {
246  int start = (p - parseStr.data()) + offset;
247  int stop = std::min(start + size, int(colors.size()));
248  for (int i = start; i < stop; ++i) {
249  colors[i] = c;
250  }
251 }
#define DEBUG_TCLPARSER
Definition: TclParser.hh:8
bool starts_with(string_view x) const
Definition: string_view.cc:116
vecN< N, T > min(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:269
#define DEBUG_PRINT(x)
Definition: TclParser.hh:56
void pop_front()
Definition: string_view.hh:75
void remove_prefix(size_type n)
Definition: string_view.hh:65
TclParser(Tcl_Interp *interp, string_view input)
Input: Tcl interpreter and command to parse.
Definition: TclParser.cc:70
This class implements a (close approximation) of the std::string_view class.
Definition: string_view.hh:16
static bool isProc(Tcl_Interp *interp, string_view str)
Is the given string a valid Tcl command.
Definition: TclParser.cc:234
bool all_of(InputRange &&range, UnaryPredicate pred)
Definition: ranges.hh:119
std::string strCat(Ts &&...ts)
Definition: strCat.hh:577
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.hh:62
TclObject t
Assign new value to some variable and restore the original value when this object goes out of scope...
Definition: ScopedAssign.hh:7