openMSX
AdhocCliCommParser.cc
Go to the documentation of this file.
1 #include "AdhocCliCommParser.hh"
2 #include "utf8_unchecked.hh"
3 
4 
5 AdhocCliCommParser::AdhocCliCommParser(std::function<void(const std::string&)> callback_)
6  : callback(std::move(callback_))
7  , state(O0)
8 {
9 }
10 
11 void AdhocCliCommParser::parse(const char* buf, size_t n)
12 {
13  for (size_t i = 0; i < n; ++i) parse(buf[i]);
14 }
15 
16 void AdhocCliCommParser::parse(char c)
17 {
18  // Whenever there is a parse error we return to the initial state
19  switch (state) {
20  case O0: // looking for opening tag
21  state = (c == '<') ? O1 : O0; break;
22  case O1: // matched <
23  state = (c == 'c') ? O2 : O0; break;
24  case O2: // matched <c
25  state = (c == 'o') ? O3 : O0; break;
26  case O3: // matched <co
27  state = (c == 'm') ? O4 : O0; break;
28  case O4: // matched <com
29  state = (c == 'm') ? O5 : O0; break;
30  case O5: // matched <comm
31  state = (c == 'a') ? O6 : O0; break;
32  case O6: // matched <comma
33  state = (c == 'n') ? O7 : O0; break;
34  case O7: // matched <comman
35  state = (c == 'd') ? O8 : O0; break;
36  case O8: // matched <command
37  if (c == '>') {
38  state = C0;
39  command.clear();
40  } else {
41  state = O0;
42  }
43  break;
44  case C0: // matched <command>, now parsing xml entities and </command>
45  if (c == '<') state = C1;
46  else if (c == '&') state = A1;
47  else command += c;
48  break;
49  case C1: // matched <
50  state = (c == '/') ? C2 : O0; break;
51  case C2: // matched </
52  state = (c == 'c') ? C3 : O0; break;
53  case C3: // matched </c
54  state = (c == 'o') ? C4 : O0; break;
55  case C4: // matched </co
56  state = (c == 'm') ? C5 : O0; break;
57  case C5: // matched </com
58  state = (c == 'm') ? C6 : O0; break;
59  case C6: // matched </comm
60  state = (c == 'a') ? C7 : O0; break;
61  case C7: // matched </comma
62  state = (c == 'n') ? C8 : O0; break;
63  case C8: // matched </comman
64  state = (c == 'd') ? C9 : O0; break;
65  case C9: // matched </command
66  if (c == '>') callback(command);
67  state = O0;
68  break;
69  case A1: // matched &
70  if (c == 'l') state = L2;
71  else if (c == 'a') state = A2;
72  else if (c == 'g') state = G2;
73  else if (c == 'q') state = Q2;
74  else if (c == '#') { state = H2; unicode = 0; }
75  else state = O0; // error
76  break;
77  case A2: // matched &a
78  if (c == 'm') state = A3;
79  else if (c == 'p') state = P3;
80  else state = O0; // error
81  break;
82  case A3: // matched &am
83  state = (c == 'p') ? A4 : O0; break;
84  case A4: // matched &amp
85  if (c == ';') {
86  command += '&';
87  state = C0;
88  } else {
89  state = O0; // error
90  }
91  break;
92  case P3: // matched &ap
93  state = (c == 'o') ? P4 : O0; break;
94  case P4: // matched &apo
95  state = (c == 's') ? P5 : O0; break;
96  case P5: // matched &apos
97  if (c == ';') {
98  command += '\'';
99  state = C0;
100  } else {
101  state = O0; // error
102  }
103  break;
104  case Q2: // matched &q
105  state = (c == 'u') ? Q3 : O0; break;
106  case Q3: // matched &qu
107  state = (c == 'o') ? Q4 : O0; break;
108  case Q4: // matched &quo
109  state = (c == 't') ? Q5 : O0; break;
110  case Q5: // matched &quot
111  if (c == ';') {
112  command += '"';
113  state = C0;
114  } else {
115  state = O0; // error
116  }
117  break;
118  case G2: // matched &g
119  state = (c == 't') ? G3 : O0; break;
120  case G3: // matched &gt
121  if (c == ';') {
122  command += '>';
123  state = C0;
124  } else {
125  state = O0; // error
126  }
127  break;
128  case L2: // matched &l
129  state = (c == 't') ? L3 : O0; break;
130  case L3: // matched &lt
131  if (c == ';') {
132  command += '<';
133  state = C0;
134  } else {
135  state = O0; // error
136  }
137  break;
138  case H2: // matched &#
139  // This also parses invalid input like '&#12xab;' but let's
140  // ignore that. It also doesn't check for overflow etc.
141  if (c == ';') {
142  utf8::unchecked::append(unicode, back_inserter(command));
143  state = C0;
144  } else if (c == 'x') {
145  state = H3;
146  } else {
147  unicode *= 10;
148  if (('0' <= c) && (c <= '9')) unicode += c - '0';
149  else state = O0;
150  }
151  break;
152  case H3: // matched &#x
153  if (c == ';') {
154  utf8::unchecked::append(unicode, back_inserter(command));
155  state = C0;
156  } else {
157  unicode *= 16;
158  if (('0' <= c) && (c <= '9')) unicode += c - '0';
159  else if (('a' <= c) && (c <= 'f')) unicode += c - 'a' + 10;
160  else if (('A' <= c) && (c <= 'F')) unicode += c - 'A' + 10;
161  else state = O0;
162  }
163  break;
164  }
165 }
AdhocCliCommParser::AdhocCliCommParser
AdhocCliCommParser(std::function< void(const std::string &)> callback)
Definition: AdhocCliCommParser.cc:5
utf8_unchecked.hh
utf8::unchecked::append
octet_iterator append(uint32_t cp, octet_iterator result)
Definition: utf8_unchecked.hh:39
AdhocCliCommParser::parse
void parse(const char *buf, size_t n)
Definition: AdhocCliCommParser.cc:11
AdhocCliCommParser.hh