openMSX
TclParser.cc
Go to the documentation of this file.
1 #include "TclParser.hh"
2 #include "ScopedAssign.hh"
3 #include "one_of.hh"
4 #include "ranges.hh"
5 #include "strCat.hh"
6 #include "StringOp.hh"
7 #include <algorithm>
8 #include <iostream>
9 #include <cassert>
10 
11 #if DEBUG_TCLPARSER
12 void TclParser::DEBUG_PRINT(const std::string& s)
13 {
14  std::cout << std::string(2 * level, ' ') << s << '\n';
15 }
16 
17 static constexpr std::string_view type2string(int type)
18 {
19  switch (type) {
20  case TCL_TOKEN_WORD:
21  return "word";
22  case TCL_TOKEN_SIMPLE_WORD:
23  return "simple word";
24  case TCL_TOKEN_EXPAND_WORD:
25  return "expand word";
26  case TCL_TOKEN_TEXT:
27  return "text";
28  case TCL_TOKEN_BS:
29  return "bs";
30  case TCL_TOKEN_COMMAND:
31  return "command";
32  case TCL_TOKEN_VARIABLE:
33  return "variable";
34  case TCL_TOKEN_SUB_EXPR:
35  return "sub expr";
36  case TCL_TOKEN_OPERATOR:
37  return "operator";
38  default:
39  assert(false);
40  return {};
41  }
42 }
43 #endif
44 
45 static constexpr bool inRange(char c, char low, char high)
46 {
47  unsigned t = c - low;
48  return t <= unsigned(high - low);
49 }
50 
51 static bool isNumber(std::string_view str)
52 {
53  if (StringOp::startsWith(str, '-') || StringOp::startsWith(str, '+')) {
54  str.remove_prefix(1);
55  }
56  if (StringOp::startsWith(str, "0x") || StringOp::startsWith(str, "0X")) {
57  str.remove_prefix(2);
58  return ranges::all_of(str, [](char c) {
59  return inRange(c, '0', '9') ||
60  inRange(c, 'a', 'f') ||
61  inRange(c, 'A', 'F');
62  });
63  } else {
64  return ranges::all_of(str,
65  [](char c) { return inRange(c, '0', '9'); });
66  }
67 }
68 
69 
70 TclParser::TclParser(Tcl_Interp* interp_, std::string_view input)
71  : interp(interp_)
72  , colors(input.size(), '.')
73  , parseStr(input)
74  , offset(0)
76  , level(0)
77 #endif
78 {
79  parse(parseStr.data(), int(parseStr.size()), COMMAND);
80 }
81 
82 void TclParser::parse(const char* p, int size, ParseType type)
83 {
84  ScopedAssign<int> sa1(offset, offset + (p - parseStr.data()));
85  ScopedAssign sa2(parseStr, std::string(p, size));
86  last.push_back(offset);
87 
88  // The functions Tcl_ParseCommand() and Tcl_ParseExpr() are meant to
89  // operate on a complete command. For interactive syntax highlighting
90  // we also want to pass incomplete commands (e.g. with an opening, but
91  // not yet a closing brace). This loop tries to parse and depending on
92  // the parse error retries with a completed command.
93  Tcl_Parse parseInfo;
94  int retryCount = 0;
95  while (true) {
96  int parseStatus = (type == EXPRESSION)
97  ? Tcl_ParseExpr(interp, parseStr.data(), int(parseStr.size()), &parseInfo)
98  : Tcl_ParseCommand(interp, parseStr.data(), int(parseStr.size()), 1, &parseInfo);
99  if (parseStatus == TCL_OK) break;
100  Tcl_FreeParse(&parseInfo);
101  ++retryCount;
102 
103  bool allowComplete = ((offset + parseStr.size()) >= colors.size()) &&
104  (retryCount < 10);
105  Tcl_Obj* resObj = Tcl_GetObjResult(interp);
106  int resLen;
107  const char* resStr = Tcl_GetStringFromObj(resObj, &resLen);
108  std::string_view error(resStr, resLen);
109 
110  if (allowComplete && StringOp::startsWith(error, "missing close-brace")) {
111  parseStr += '}';
112  } else if (allowComplete && StringOp::startsWith(error, "missing close-bracket")) {
113  parseStr += ']';
114  } else if (allowComplete && StringOp::startsWith(error, "missing \"")) {
115  parseStr += '"';
116  } else if (allowComplete && StringOp::startsWith(error, "unbalanced open paren")) {
117  parseStr += ')';
118  } else if (allowComplete && StringOp::startsWith(error, "missing operand")) {
119  // This also triggers for a (wrong) expression like
120  // 'if { / 3'
121  // and that can't be solved by adding something at the
122  // end. Without the retryCount stuff we would get in an
123  // infinite loop here.
124  parseStr += '0';
125  } else if (allowComplete && StringOp::startsWith(error, "missing )")) {
126  parseStr += ')';
127  } else {
128  DEBUG_PRINT("ERROR: " + parseStr + ": " + error);
129  setColors(parseStr.data(), int(parseStr.size()), 'E');
130  if ((offset + size) < int(colors.size())) last.pop_back();
131  return;
132  }
133  }
134 
135  if (type == EXPRESSION) {
136  DEBUG_PRINT("EXPRESSION: " + parseStr);
137  } else {
138  if (parseInfo.commentSize) {
139  DEBUG_PRINT("COMMENT: " + std::string_view(parseInfo.commentStart, parseInfo.commentSize));
140  setColors(parseInfo.commentStart, parseInfo.commentSize, 'c');
141  }
142  DEBUG_PRINT("COMMAND: " + std::string_view(parseInfo.commandStart, parseInfo.commandSize));
143  }
144  printTokens(parseInfo.tokenPtr, parseInfo.numTokens);
145 
146  // If the current sub-command stops before the end of the original
147  // full command, then it's not the last sub-command. Note that
148  // sub-commands can be nested.
149  if ((offset + size) < int(colors.size())) last.pop_back();
150 
151  const char* nextStart = parseInfo.commandStart + parseInfo.commandSize;
152  Tcl_FreeParse(&parseInfo);
153 
154  if (type == COMMAND) {
155  // next command
156  int nextSize = int((parseStr.data() + parseStr.size()) - nextStart);
157  if (nextSize > 0) {
158  parse(nextStart, nextSize, type);
159  }
160  }
161 }
162 
163 void TclParser::printTokens(Tcl_Token* tokens, int numTokens)
164 {
165 #if DEBUG_TCLPARSER
166  ScopedAssign sa(level, level + 1);
167 #endif
168  for (int i = 0; i < numTokens; ) {
169  Tcl_Token& token = tokens[i];
170  std::string_view tokenStr(token.start, token.size);
171  DEBUG_PRINT(type2string(token.type) + " -> " + tokenStr);
172  switch (token.type) {
173  case TCL_TOKEN_VARIABLE:
174  assert(token.numComponents >= 1);
175  setColors(tokens[i + 1].start - 1, tokens[i + 1].size + 1, 'v');
176  break;
177  case TCL_TOKEN_WORD:
178  case TCL_TOKEN_SIMPLE_WORD:
179  if (*token.start == '"') {
180  setColors(token.start, token.size, 'l');
181  }
182  if ((i == 0) && isProc(interp, tokenStr)) {
183  setColors(token.start, token.size, 'p');
184  }
185  break;
186  case TCL_TOKEN_EXPAND_WORD:
187  setColors(token.start, 3, 'o');
188  break;
189  case TCL_TOKEN_OPERATOR:
190  case TCL_TOKEN_BS:
191  setColors(token.start, token.size, 'o');
192  break;
193  case TCL_TOKEN_TEXT:
194  if (isNumber(tokenStr) || (*token.start == '"')) {
195  // TODO only works if the same as 'l'
196  setColors(token.start, token.size, 'l');
197  }
198  break;
199  }
200  if (token.type == TCL_TOKEN_COMMAND) {
201  parse(token.start + 1, token.size - 2, COMMAND);
202  } else if (token.type == TCL_TOKEN_SIMPLE_WORD) {
203  ParseType subType = guessSubType(tokens, i);
204  if (subType != OTHER) {
205  parse(tokens[i + 1].start, tokens[i + 1].size, subType);
206  }
207  }
208  printTokens(&tokens[++i], token.numComponents);
209  i += token.numComponents;
210  }
211 }
212 
213 TclParser::ParseType TclParser::guessSubType(Tcl_Token* tokens, int i)
214 {
215  // heuristic: if previous token is 'if' then assume this is an expression
216  if ((i >= 1) && (tokens[i - 1].type == TCL_TOKEN_TEXT)) {
217  std::string_view prevText(tokens[i - 1].start, tokens[i - 1].size);
218  if (prevText == one_of("if", "elseif", "expr")) {
219  return EXPRESSION;
220  }
221  }
222 
223  // heuristic: parse text that starts with { as a subcommand
224  if (*tokens[i].start == '{') {
225  return COMMAND;
226  }
227 
228  // a plain text element
229  return OTHER;
230 }
231 
232 bool TclParser::isProc(Tcl_Interp* interp, std::string_view str)
233 {
234  auto command = tmpStrCat("openmsx::is_command_name {", str, '}');
235  if (Tcl_Eval(interp, command.c_str()) != TCL_OK) return false;
236  int result;
237  if (Tcl_GetBooleanFromObj(interp, Tcl_GetObjResult(interp), &result)
238  != TCL_OK) return false;
239  return result != 0;
240 }
241 
242 void TclParser::setColors(const char* p, int size, char c)
243 {
244  int start = (p - parseStr.data()) + offset;
245  int stop = std::min(start + size, int(colors.size()));
246  for (auto i : xrange(start, stop)) {
247  colors[i] = c;
248  }
249 }
TclObject t
#define DEBUG_TCLPARSER
Definition: TclParser.hh:9
#define DEBUG_PRINT(x)
Definition: TclParser.hh:57
Assign new value to some variable and restore the original value when this object goes out of scope.
Definition: ScopedAssign.hh:8
TclParser(Tcl_Interp *interp, std::string_view input)
Input: Tcl interpreter and command to parse
Definition: TclParser.cc:70
static bool isProc(Tcl_Interp *interp, std::string_view str)
Is the given string a valid Tcl command.
Definition: TclParser.cc:232
Definition: one_of.hh:7
bool startsWith(string_view total, string_view part)
Definition: StringOp.cc:29
constexpr vecN< N, T > min(const vecN< N, T > &x, const vecN< N, T > &y)
Definition: gl_vec.hh:269
bool all_of(InputRange &&range, UnaryPredicate pred)
Definition: ranges.hh:148
size_t size(std::string_view utf8)
TemporaryString tmpStrCat(Ts &&... ts)
Definition: strCat.hh:659
constexpr auto xrange(T e)
Definition: xrange.hh:155