openMSX
TclParser.cc
Go to the documentation of this file.
1 #include "TclParser.hh"
2 #include "ScopedAssign.hh"
3 #include "ranges.hh"
4 #include "strCat.hh"
5 #include "StringOp.hh"
6 #include <algorithm>
7 #include <iostream>
8 #include <cassert>
9 
10 using std::string;
11 
12 #if DEBUG_TCLPARSER
13 void TclParser::DEBUG_PRINT(const string& s)
14 {
15  std::cout << string(2 * level, ' ') << s << '\n';
16 }
17 
18 static std::string_view type2string(int type)
19 {
20  switch (type) {
21  case TCL_TOKEN_WORD:
22  return "word";
23  case TCL_TOKEN_SIMPLE_WORD:
24  return "simple word";
25  case TCL_TOKEN_EXPAND_WORD:
26  return "expand word";
27  case TCL_TOKEN_TEXT:
28  return "text";
29  case TCL_TOKEN_BS:
30  return "bs";
31  case TCL_TOKEN_COMMAND:
32  return "command";
33  case TCL_TOKEN_VARIABLE:
34  return "variable";
35  case TCL_TOKEN_SUB_EXPR:
36  return "sub expr";
37  case TCL_TOKEN_OPERATOR:
38  return "operator";
39  default:
40  assert(false);
41  return {};
42  }
43 }
44 #endif
45 
46 static bool inRange(char c, char low, char high)
47 {
48  unsigned t = c - low;
49  return t <= unsigned(high - low);
50 }
51 
52 static bool isNumber(std::string_view str)
53 {
54  if (StringOp::startsWith(str, '-') || StringOp::startsWith(str, '+')) {
55  str.remove_prefix(1);
56  }
57  if (StringOp::startsWith(str, "0x") || StringOp::startsWith(str, "0X")) {
58  str.remove_prefix(2);
59  return ranges::all_of(str, [](char c) {
60  return inRange(c, '0', '9') ||
61  inRange(c, 'a', 'f') ||
62  inRange(c, 'A', 'F');
63  });
64  } else {
65  return ranges::all_of(str,
66  [](char c) { return inRange(c, '0', '9'); });
67  }
68 }
69 
70 
71 TclParser::TclParser(Tcl_Interp* interp_, std::string_view input)
72  : interp(interp_)
73  , colors(input.size(), '.')
74  , parseStr(input)
75  , offset(0)
77  , level(0)
78 #endif
79 {
80  parse(parseStr.data(), int(parseStr.size()), COMMAND);
81 }
82 
83 void TclParser::parse(const char* p, int size, ParseType type)
84 {
85  ScopedAssign<int> sa1(offset, offset + (p - parseStr.data()));
86  ScopedAssign sa2(parseStr, string(p, size));
87  last.push_back(offset);
88 
89  // The functions Tcl_ParseCommand() and Tcl_ParseExpr() are meant to
90  // operate on a complete command. For interactive syntax highlighting
91  // we also want to pass incomplete commands (e.g. with an opening, but
92  // not yet a closing brace). This loop tries to parse and depening on
93  // the parse error retries with a completed command.
94  Tcl_Parse parseInfo;
95  int retryCount = 0;
96  while (true) {
97  int parseStatus = (type == EXPRESSION)
98  ? Tcl_ParseExpr(interp, parseStr.data(), int(parseStr.size()), &parseInfo)
99  : Tcl_ParseCommand(interp, parseStr.data(), int(parseStr.size()), 1, &parseInfo);
100  if (parseStatus == TCL_OK) break;
101  Tcl_FreeParse(&parseInfo);
102  ++retryCount;
103 
104  bool allowComplete = ((offset + parseStr.size()) >= colors.size()) &&
105  (retryCount < 10);
106  Tcl_Obj* resObj = Tcl_GetObjResult(interp);
107  int resLen;
108  const char* resStr = Tcl_GetStringFromObj(resObj, &resLen);
109  std::string_view error(resStr, resLen);
110 
111  if (allowComplete && StringOp::startsWith(error, "missing close-brace")) {
112  parseStr += '}';
113  } else if (allowComplete && StringOp::startsWith(error, "missing close-bracket")) {
114  parseStr += ']';
115  } else if (allowComplete && StringOp::startsWith(error, "missing \"")) {
116  parseStr += '"';
117  } else if (allowComplete && StringOp::startsWith(error, "unbalanced open paren")) {
118  parseStr += ')';
119  } else if (allowComplete && StringOp::startsWith(error, "missing operand")) {
120  // This also triggers for a (wrong) expression like
121  // 'if { / 3'
122  // and that can't be solved by adding something at the
123  // end. Without the retryCount stuff we would get in an
124  // infinte loop here.
125  parseStr += '0';
126  } else if (allowComplete && StringOp::startsWith(error, "missing )")) {
127  parseStr += ')';
128  } else {
129  DEBUG_PRINT("ERROR: " + parseStr + ": " + error);
130  setColors(parseStr.data(), int(parseStr.size()), 'E');
131  if ((offset + size) < int(colors.size())) last.pop_back();
132  return;
133  }
134  }
135 
136  if (type == EXPRESSION) {
137  DEBUG_PRINT("EXPRESSION: " + parseStr);
138  } else {
139  if (parseInfo.commentSize) {
140  DEBUG_PRINT("COMMENT: " + std::string_view(parseInfo.commentStart, parseInfo.commentSize));
141  setColors(parseInfo.commentStart, parseInfo.commentSize, 'c');
142  }
143  DEBUG_PRINT("COMMAND: " + std::string_view(parseInfo.commandStart, parseInfo.commandSize));
144  }
145  printTokens(parseInfo.tokenPtr, parseInfo.numTokens);
146 
147  // If the current sub-command stops before the end of the original
148  // full command, then it's not the last sub-command. Note that
149  // sub-commands can be nested.
150  if ((offset + size) < int(colors.size())) last.pop_back();
151 
152  const char* nextStart = parseInfo.commandStart + parseInfo.commandSize;
153  Tcl_FreeParse(&parseInfo);
154 
155  if (type == COMMAND) {
156  // next command
157  int nextSize = int((parseStr.data() + parseStr.size()) - nextStart);
158  if (nextSize > 0) {
159  parse(nextStart, nextSize, type);
160  }
161  }
162 }
163 
164 void TclParser::printTokens(Tcl_Token* tokens, int numTokens)
165 {
166 #if DEBUG_TCLPARSER
167  ScopedAssign sa(level, level + 1);
168 #endif
169  for (int i = 0; i < numTokens; ) {
170  Tcl_Token& token = tokens[i];
171  std::string_view tokenStr(token.start, token.size);
172  DEBUG_PRINT(type2string(token.type) + " -> " + tokenStr);
173  switch (token.type) {
174  case TCL_TOKEN_VARIABLE:
175  assert(token.numComponents >= 1);
176  setColors(tokens[i + 1].start - 1, tokens[i + 1].size + 1, 'v');
177  break;
178  case TCL_TOKEN_WORD:
179  case TCL_TOKEN_SIMPLE_WORD:
180  if (*token.start == '"') {
181  setColors(token.start, token.size, 'l');
182  }
183  if ((i == 0) && isProc(interp, tokenStr)) {
184  setColors(token.start, token.size, 'p');
185  }
186  break;
187  case TCL_TOKEN_EXPAND_WORD:
188  setColors(token.start, 3, 'o');
189  break;
190  case TCL_TOKEN_OPERATOR:
191  case TCL_TOKEN_BS:
192  setColors(token.start, token.size, 'o');
193  break;
194  case TCL_TOKEN_TEXT:
195  if (isNumber(tokenStr) || (*token.start == '"')) {
196  // TODO only works if the same as 'l'
197  setColors(token.start, token.size, 'l');
198  }
199  break;
200  }
201  if (token.type == TCL_TOKEN_COMMAND) {
202  parse(token.start + 1, token.size - 2, COMMAND);
203  } else if (token.type == TCL_TOKEN_SIMPLE_WORD) {
204  ParseType subType = guessSubType(tokens, i);
205  if (subType != OTHER) {
206  parse(tokens[i + 1].start, tokens[i + 1].size, subType);
207  }
208  }
209  printTokens(&tokens[++i], token.numComponents);
210  i += token.numComponents;
211  }
212 }
213 
214 TclParser::ParseType TclParser::guessSubType(Tcl_Token* tokens, int i)
215 {
216  // heuristic: if previous token is 'if' then assume this is an expression
217  if ((i >= 1) && (tokens[i - 1].type == TCL_TOKEN_TEXT)) {
218  std::string_view prevText(tokens[i - 1].start, tokens[i - 1].size);
219  if ((prevText == "if") ||
220  (prevText == "elseif") ||
221  (prevText == "expr")) {
222  return EXPRESSION;
223  }
224  }
225 
226  // heuristic: parse text that starts with { as a subcommand
227  if (*tokens[i].start == '{') {
228  return COMMAND;
229  }
230 
231  // a plain text element
232  return OTHER;
233 }
234 
235 bool TclParser::isProc(Tcl_Interp* interp, std::string_view str)
236 {
237  string command = strCat("openmsx::is_command_name {", str, '}');
238  if (Tcl_Eval(interp, command.c_str()) != TCL_OK) return false;
239  int result;
240  if (Tcl_GetBooleanFromObj(interp, Tcl_GetObjResult(interp), &result)
241  != TCL_OK) return false;
242  return result != 0;
243 }
244 
245 void TclParser::setColors(const char* p, int size, char c)
246 {
247  int start = (p - parseStr.data()) + offset;
248  int stop = std::min(start + size, int(colors.size()));
249  for (int i = start; i < stop; ++i) {
250  colors[i] = c;
251  }
252 }
#define DEBUG_TCLPARSER
Definition: TclParser.hh:9
vecN< N, T > min(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:269
TclParser(Tcl_Interp *interp, std::string_view input)
Input: Tcl interpreter and command to parse.
Definition: TclParser.cc:71
#define DEBUG_PRINT(x)
Definition: TclParser.hh:57
bool startsWith(string_view total, string_view part)
Definition: StringOp.cc:71
size_t size(std::string_view utf8)
static bool isProc(Tcl_Interp *interp, std::string_view str)
Is the given string a valid Tcl command.
Definition: TclParser.cc:235
bool all_of(InputRange &&range, UnaryPredicate pred)
Definition: ranges.hh:119
std::string strCat(Ts &&...ts)
Definition: strCat.hh:573
TclObject t
Assign new value to some variable and restore the original value when this object goes out of scope...
Definition: ScopedAssign.hh:7