openMSX
AdhocCliCommParser.cc
Go to the documentation of this file.
2#include "utf8_unchecked.hh"
3#include "xrange.hh"
4
5
6AdhocCliCommParser::AdhocCliCommParser(std::function<void(const std::string&)> callback_)
7 : callback(std::move(callback_))
8 , state(O0)
9{
10}
11
12void AdhocCliCommParser::parse(const char* buf, size_t n)
13{
14 for (auto i : xrange(n)) {
15 parse(buf[i]);
16 }
17}
18
20{
21 // Whenever there is a parse error we return to the initial state
22 switch (state) {
23 case O0: // looking for opening tag
24 state = (c == '<') ? O1 : O0; break;
25 case O1: // matched <
26 state = (c == 'c') ? O2 : O0; break;
27 case O2: // matched <c
28 state = (c == 'o') ? O3 : O0; break;
29 case O3: // matched <co
30 state = (c == 'm') ? O4 : O0; break;
31 case O4: // matched <com
32 state = (c == 'm') ? O5 : O0; break;
33 case O5: // matched <comm
34 state = (c == 'a') ? O6 : O0; break;
35 case O6: // matched <comma
36 state = (c == 'n') ? O7 : O0; break;
37 case O7: // matched <comman
38 state = (c == 'd') ? O8 : O0; break;
39 case O8: // matched <command
40 if (c == '>') {
41 state = C0;
42 command.clear();
43 } else {
44 state = O0;
45 }
46 break;
47 case C0: // matched <command>, now parsing xml entities and </command>
48 if (c == '<') state = C1;
49 else if (c == '&') state = A1;
50 else command += c;
51 break;
52 case C1: // matched <
53 state = (c == '/') ? C2 : O0; break;
54 case C2: // matched </
55 state = (c == 'c') ? C3 : O0; break;
56 case C3: // matched </c
57 state = (c == 'o') ? C4 : O0; break;
58 case C4: // matched </co
59 state = (c == 'm') ? C5 : O0; break;
60 case C5: // matched </com
61 state = (c == 'm') ? C6 : O0; break;
62 case C6: // matched </comm
63 state = (c == 'a') ? C7 : O0; break;
64 case C7: // matched </comma
65 state = (c == 'n') ? C8 : O0; break;
66 case C8: // matched </comman
67 state = (c == 'd') ? C9 : O0; break;
68 case C9: // matched </command
69 if (c == '>') callback(command);
70 state = O0;
71 break;
72 case A1: // matched &
73 if (c == 'l') state = L2;
74 else if (c == 'a') state = A2;
75 else if (c == 'g') state = G2;
76 else if (c == 'q') state = Q2;
77 else if (c == '#') { state = H2; unicode = 0; }
78 else state = O0; // error
79 break;
80 case A2: // matched &a
81 if (c == 'm') state = A3;
82 else if (c == 'p') state = P3;
83 else state = O0; // error
84 break;
85 case A3: // matched &am
86 state = (c == 'p') ? A4 : O0; break;
87 case A4: // matched &amp
88 if (c == ';') {
89 command += '&';
90 state = C0;
91 } else {
92 state = O0; // error
93 }
94 break;
95 case P3: // matched &ap
96 state = (c == 'o') ? P4 : O0; break;
97 case P4: // matched &apo
98 state = (c == 's') ? P5 : O0; break;
99 case P5: // matched &apos
100 if (c == ';') {
101 command += '\'';
102 state = C0;
103 } else {
104 state = O0; // error
105 }
106 break;
107 case Q2: // matched &q
108 state = (c == 'u') ? Q3 : O0; break;
109 case Q3: // matched &qu
110 state = (c == 'o') ? Q4 : O0; break;
111 case Q4: // matched &quo
112 state = (c == 't') ? Q5 : O0; break;
113 case Q5: // matched &quot
114 if (c == ';') {
115 command += '"';
116 state = C0;
117 } else {
118 state = O0; // error
119 }
120 break;
121 case G2: // matched &g
122 state = (c == 't') ? G3 : O0; break;
123 case G3: // matched &gt
124 if (c == ';') {
125 command += '>';
126 state = C0;
127 } else {
128 state = O0; // error
129 }
130 break;
131 case L2: // matched &l
132 state = (c == 't') ? L3 : O0; break;
133 case L3: // matched &lt
134 if (c == ';') {
135 command += '<';
136 state = C0;
137 } else {
138 state = O0; // error
139 }
140 break;
141 case H2: // matched &#
142 // This also parses invalid input like '&#12xab;' but let's
143 // ignore that. It also doesn't check for overflow etc.
144 if (c == ';') {
145 utf8::unchecked::append(unicode, back_inserter(command));
146 state = C0;
147 } else if (c == 'x') {
148 state = H3;
149 } else {
150 unicode *= 10;
151 if (('0' <= c) && (c <= '9')) unicode += c - '0';
152 else state = O0;
153 }
154 break;
155 case H3: // matched &#x
156 if (c == ';') {
157 utf8::unchecked::append(unicode, back_inserter(command));
158 state = C0;
159 } else {
160 unicode *= 16;
161 if (('0' <= c) && (c <= '9')) unicode += c - '0';
162 else if (('a' <= c) && (c <= 'f')) unicode += c - 'a' + 10;
163 else if (('A' <= c) && (c <= 'F')) unicode += c - 'A' + 10;
164 else state = O0;
165 }
166 break;
167 }
168}
void parse(const char *buf, size_t n)
AdhocCliCommParser(std::function< void(const std::string &)> callback)
STL namespace.
octet_iterator append(uint32_t cp, octet_iterator result)
constexpr auto xrange(T e)
Definition: xrange.hh:133