openMSX
AdhocCliCommParser.cc
Go to the documentation of this file.
1 #include "AdhocCliCommParser.hh"
2 #include "utf8_unchecked.hh"
3 #include "xrange.hh"
4 
5 
6 AdhocCliCommParser::AdhocCliCommParser(std::function<void(const std::string&)> callback_)
7  : callback(std::move(callback_))
8  , state(O0)
9 {
10 }
11 
12 void AdhocCliCommParser::parse(const char* buf, size_t n)
13 {
14  for (auto i : xrange(n)) {
15  parse(buf[i]);
16  }
17 }
18 
19 void AdhocCliCommParser::parse(char c)
20 {
21  // Whenever there is a parse error we return to the initial state
22  switch (state) {
23  case O0: // looking for opening tag
24  state = (c == '<') ? O1 : O0; break;
25  case O1: // matched <
26  state = (c == 'c') ? O2 : O0; break;
27  case O2: // matched <c
28  state = (c == 'o') ? O3 : O0; break;
29  case O3: // matched <co
30  state = (c == 'm') ? O4 : O0; break;
31  case O4: // matched <com
32  state = (c == 'm') ? O5 : O0; break;
33  case O5: // matched <comm
34  state = (c == 'a') ? O6 : O0; break;
35  case O6: // matched <comma
36  state = (c == 'n') ? O7 : O0; break;
37  case O7: // matched <comman
38  state = (c == 'd') ? O8 : O0; break;
39  case O8: // matched <command
40  if (c == '>') {
41  state = C0;
42  command.clear();
43  } else {
44  state = O0;
45  }
46  break;
47  case C0: // matched <command>, now parsing xml entities and </command>
48  if (c == '<') state = C1;
49  else if (c == '&') state = A1;
50  else command += c;
51  break;
52  case C1: // matched <
53  state = (c == '/') ? C2 : O0; break;
54  case C2: // matched </
55  state = (c == 'c') ? C3 : O0; break;
56  case C3: // matched </c
57  state = (c == 'o') ? C4 : O0; break;
58  case C4: // matched </co
59  state = (c == 'm') ? C5 : O0; break;
60  case C5: // matched </com
61  state = (c == 'm') ? C6 : O0; break;
62  case C6: // matched </comm
63  state = (c == 'a') ? C7 : O0; break;
64  case C7: // matched </comma
65  state = (c == 'n') ? C8 : O0; break;
66  case C8: // matched </comman
67  state = (c == 'd') ? C9 : O0; break;
68  case C9: // matched </command
69  if (c == '>') callback(command);
70  state = O0;
71  break;
72  case A1: // matched &
73  if (c == 'l') state = L2;
74  else if (c == 'a') state = A2;
75  else if (c == 'g') state = G2;
76  else if (c == 'q') state = Q2;
77  else if (c == '#') { state = H2; unicode = 0; }
78  else state = O0; // error
79  break;
80  case A2: // matched &a
81  if (c == 'm') state = A3;
82  else if (c == 'p') state = P3;
83  else state = O0; // error
84  break;
85  case A3: // matched &am
86  state = (c == 'p') ? A4 : O0; break;
87  case A4: // matched &amp
88  if (c == ';') {
89  command += '&';
90  state = C0;
91  } else {
92  state = O0; // error
93  }
94  break;
95  case P3: // matched &ap
96  state = (c == 'o') ? P4 : O0; break;
97  case P4: // matched &apo
98  state = (c == 's') ? P5 : O0; break;
99  case P5: // matched &apos
100  if (c == ';') {
101  command += '\'';
102  state = C0;
103  } else {
104  state = O0; // error
105  }
106  break;
107  case Q2: // matched &q
108  state = (c == 'u') ? Q3 : O0; break;
109  case Q3: // matched &qu
110  state = (c == 'o') ? Q4 : O0; break;
111  case Q4: // matched &quo
112  state = (c == 't') ? Q5 : O0; break;
113  case Q5: // matched &quot
114  if (c == ';') {
115  command += '"';
116  state = C0;
117  } else {
118  state = O0; // error
119  }
120  break;
121  case G2: // matched &g
122  state = (c == 't') ? G3 : O0; break;
123  case G3: // matched &gt
124  if (c == ';') {
125  command += '>';
126  state = C0;
127  } else {
128  state = O0; // error
129  }
130  break;
131  case L2: // matched &l
132  state = (c == 't') ? L3 : O0; break;
133  case L3: // matched &lt
134  if (c == ';') {
135  command += '<';
136  state = C0;
137  } else {
138  state = O0; // error
139  }
140  break;
141  case H2: // matched &#
142  // This also parses invalid input like '&#12xab;' but let's
143  // ignore that. It also doesn't check for overflow etc.
144  if (c == ';') {
145  utf8::unchecked::append(unicode, back_inserter(command));
146  state = C0;
147  } else if (c == 'x') {
148  state = H3;
149  } else {
150  unicode *= 10;
151  if (('0' <= c) && (c <= '9')) unicode += c - '0';
152  else state = O0;
153  }
154  break;
155  case H3: // matched &#x
156  if (c == ';') {
157  utf8::unchecked::append(unicode, back_inserter(command));
158  state = C0;
159  } else {
160  unicode *= 16;
161  if (('0' <= c) && (c <= '9')) unicode += c - '0';
162  else if (('a' <= c) && (c <= 'f')) unicode += c - 'a' + 10;
163  else if (('A' <= c) && (c <= 'F')) unicode += c - 'A' + 10;
164  else state = O0;
165  }
166  break;
167  }
168 }
void parse(const char *buf, size_t n)
AdhocCliCommParser(std::function< void(const std::string &)> callback)
octet_iterator append(uint32_t cp, octet_iterator result)
constexpr auto xrange(T e)
Definition: xrange.hh:155