openMSX
rapidsax.hh
Go to the documentation of this file.
1 #ifndef RAPIDSAX_HH
2 #define RAPIDSAX_HH
3 
4 // This code is _heavily_ based on RapidXml 1.13
5 // http://rapidxml.sourceforge.net/
6 //
7 // RapidXml is a very fast XML parser.
8 // http://xmlbench.sourceforge.net/results/benchmark200910/index.html
9 // One of the main reasons it can be this fast is that doesn't do any string
10 // copies. Instead the XML input data is modified in-place (e.g. for stuff like
11 // < replacements). Though this also means the output produced by the parser
12 // is tied to the lifetime of the XML input data.
13 //
14 // RapidXml produces a DOM-like output. This parser has a SAX-like interface.
15 
16 #include "one_of.hh"
17 #include "small_compare.hh"
18 #include <cassert>
19 #include <cstdint>
20 #include <string_view>
21 
22 namespace rapidsax {
23 
24 // Parse given XML text and call callback functions in the given handler.
25 // - XML text must be zero-terminated
26 // - Handler must implement the methods defined in NullHandler (below). An
27 // easy way to do this is to inherit from NullHandler and only reimplement
28 // the methods that you need.
29 // - The behavior of the parser can be fine-tuned with the FLAGS parameter,
30 // see below for more details.
31 // - When a parse error is encounter, an instance of ParseError is thrown.
32 // - The lifetime of the string_view's in the callback handler is the same as
33 // the lifetime of the input XML data (no string copies are made, instead
34 // the XML file is modified in-place and references to this data are passed).
35 template<int FLAGS, typename HANDLER> void parse(HANDLER& handler, char* xml);
36 
37 // When loading an XML file from disk, the buffer needs to be 8 bytes bigger
38 // than the filesize. The first of these bytes must be filled with zero
39 // (zero-terminate the xml data). The other bytes are only there to allow to
40 // read up-to 8 bytes past the end without triggering memory protection errors.
41 constexpr size_t EXTRA_BUFFER_SPACE = 8;
42 
43 
44 // Flags that influence parsing behavior. The flags can be OR'ed together.
45 
46 // Should XML entities like &lt; be expanded or not?
47 constexpr int noEntityTranslation = 0x1;
48 // Should leading and trailing whitespace be trimmed?
49 constexpr int trimWhitespace = 0x2;
50 // Should sequences of whitespace characters be replaced with a single
51 // space character?
52 constexpr int normalizeWhitespace = 0x4;
53 // Should strings be modified (in-place) with a zero-terminator?
54 constexpr int zeroTerminateStrings = 0x8;
55 
56 
57 // Callback handler with all empty implementations (can be used as a base
58 // class in case you only need to reimplement a few of the methods).
60 {
61 public:
62  // Called when an opening XML tag is encountered.
63  // 'name' is the name of the XML tag.
64  void start(std::string_view /*name*/) {}
65 
66  // Called when a XML tag is closed.
67  // Note: the parser does currently not check whether the name of the
68  // opening nd closing tags matches.
69  void stop() {}
70 
71  // Called when text inside a tag is parsed.
72  // XML entities are replaced (optional)
73  // Whitespace is (optionally) trimmed or normalized.
74  // This method is not called for an empty text string.
75  // (Unlike other SAX parsers) the whole text string is always
76  // passed in a single chunk (so no need to concatenate this text
77  // with previous chunks in the callback).
78  void text(std::string_view /*text*/) {}
79 
80  // Called for each parsed attribute.
81  // Attributes can occur inside xml tags or inside XML declarations.
82  void attribute(std::string_view /*name*/, std::string_view /*value*/) {}
83 
84  // Called for parsed CDATA sections.
85  void cdata(std::string_view /*value*/) {}
86 
87  // Called when a XML comment (<!-- ... -->) is parsed.
88  void comment(std::string_view /*value*/) {}
89 
90  // Called when XML declaration (<?xml .. ?>) is parsed.
91  // Inside a XML declaration there can be attributes.
92  void declarationStart() {}
93  void declAttribute(std::string_view /*name*/, std::string_view /*value*/) {}
94  void declarationStop() {}
95 
96  // Called when the <!DOCTYPE ..> is parsed.
97  void doctype(std::string_view /*text*/) {}
98 
99  // Called when XML processing instructions (<? .. ?>) are parsed.
100  void procInstr(std::string_view /*target*/, std::string_view /*instr*/) {}
101 };
102 
103 
105 {
106 public:
107  ParseError(const char* what_, char* where_)
108  : m_what(what_)
109  , m_where(where_)
110  {
111  }
112 
113  [[nodiscard]] const char* what() const { return m_what; }
114  [[nodiscard]] char* where() const { return m_where; }
115 
116 private:
117  const char* m_what;
118  char* m_where;
119 };
120 
121 
122 namespace internal {
123 
124 extern const uint8_t lutChar [256]; // Character class
125 extern const uint8_t lutDigits[256]; // Digits
126 
127 // Detect whitespace character (space \n \r \t)
129  [[nodiscard]] static bool test(char ch) { return (lutChar[uint8_t(ch)] & 0x02) != 0; }
130 };
131 
132 // Detect node name character (anything but space \n \r \t / > ? \0)
133 struct NodeNamePred {
134  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x43); }
135 };
136 
137 // Detect attribute name character (anything but space \n \r \t / < > = ? ! \0)
139  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0xC7); }
140 };
141 
142 // Detect text character (PCDATA) (anything but < \0)
143 struct TextPred {
144  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x05); }
145 };
146 
147 // Detect text character (PCDATA) that does not require processing when ws
148 // normalization is disabled (anything but < \0 &)
150  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x0D); }
151 };
152 
153 // Detect text character (PCDATA) that does not require processing when ws
154 // normalization is enabled (anything but < \0 & space \n \r \t)
156  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x0F); }
157 };
158 
159 // Detect attribute value character, single quote (anything but ' \0)
160 struct AttPred1 {
161  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x11); }
162 };
163 // Detect attribute value character, double quote (anything but " \0)
164 struct AttPred2 {
165  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x21); }
166 };
167 
168 // Detect attribute value character, single quote, that does not require
169 // processing (anything but ' \0 &)
170 struct AttPurePred1 {
171  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x19); }
172 };
173 // Detect attribute value character, double quote, that does not require
174 // processing (anything but " \0 &)
175 struct AttPurePred2 {
176  [[nodiscard]] static bool test(char ch) { return !(lutChar[uint8_t(ch)] & 0x29); }
177 };
178 
179 // Insert coded character, using UTF8
180 inline void insertUTF8char(char*& text, uint32_t code)
181 {
182  if (code < 0x80) { // 1 byte sequence
183  text[0] = char(code);
184  text += 1;
185  } else if (code < 0x800) {// 2 byte sequence
186  text[1] = char((code | 0x80) & 0xBF); code >>= 6;
187  text[0] = char (code | 0xC0);
188  text += 2;
189  } else if (code < 0x10000) { // 3 byte sequence
190  text[2] = char((code | 0x80) & 0xBF); code >>= 6;
191  text[1] = char((code | 0x80) & 0xBF); code >>= 6;
192  text[0] = char (code | 0xE0);
193  text += 3;
194  } else if (code < 0x110000) { // 4 byte sequence
195  text[3] = char((code | 0x80) & 0xBF); code >>= 6;
196  text[2] = char((code | 0x80) & 0xBF); code >>= 6;
197  text[1] = char((code | 0x80) & 0xBF); code >>= 6;
198  text[0] = char (code | 0xF0);
199  text += 4;
200  } else { // Invalid, only codes up to 0x10FFFF are allowed in Unicode
201  throw ParseError("invalid numeric character entity", text);
202  }
203 }
204 
205 template<char C0, char C1> [[nodiscard]] static inline bool next(const char* p)
206 {
207  return small_compare<C0, C1>(p);
208 }
209 template<char C0, char C1, char C2> [[nodiscard]] static inline bool next(const char* p)
210 {
211  return small_compare<C0, C1, C2>(p);
212 }
213 template<char C0, char C1, char C2, char C3> [[nodiscard]] static inline bool next(const char* p)
214 {
215  return small_compare<C0, C1, C2, C3>(p);
216 }
217 template<char C0, char C1, char C2, char C3, char C4, char C5>
218 [[nodiscard]] static inline bool next(const char* p)
219 {
220  return small_compare<C0, C1, C2, C3, C4, C5>(p);
221 }
222 
223 
224 // Skip characters until predicate evaluates to true
225 template<typename StopPred> static inline void skip(char*& text)
226 {
227  char* tmp = text;
228  while (StopPred::test(*tmp)) ++tmp;
229  text = tmp;
230 }
231 
232 // Skip characters until predicate evaluates to true while doing the following:
233 // - replacing XML character entity references with proper characters
234 // (&apos; &amp; &quot; &lt; &gt; &#...;)
235 // - condensing whitespace sequences to single space character
236 template<typename StopPred, class StopPredPure, int FLAGS>
237 [[nodiscard]] static inline char* skipAndExpand(char*& text)
238 {
239  // If entity translation, whitespace condense and whitespace
240  // trimming is disabled, use plain skip.
241  if constexpr ( (FLAGS & noEntityTranslation) &&
242  !(FLAGS & normalizeWhitespace) &&
243  !(FLAGS & trimWhitespace)) {
244  skip<StopPred>(text);
245  return text;
246  }
247 
248  // Use simple skip until first modification is detected
249  skip<StopPredPure>(text);
250 
251  // Use translation skip
252  char* src = text;
253  char* dest = src;
254  while (StopPred::test(*src)) {
255  // Test if replacement is needed
256  if (!(FLAGS & noEntityTranslation) &&
257  (src[0] == '&')) {
258  switch (src[1]) {
259  case 'a': // &amp; &apos;
260  if (next<'m','p',';'>(&src[2])) {
261  *dest = '&';
262  ++dest;
263  src += 5;
264  continue;
265  }
266  if (next<'p','o','s',';'>(&src[2])) {
267  *dest = '\'';
268  ++dest;
269  src += 6;
270  continue;
271  }
272  break;
273 
274  case 'q': // &quot;
275  if (next<'u','o','t',';'>(&src[2])) {
276  *dest = '"';
277  ++dest;
278  src += 6;
279  continue;
280  }
281  break;
282 
283  case 'g': // &gt;
284  if (next<'t',';'>(&src[2])) {
285  *dest = '>';
286  ++dest;
287  src += 4;
288  continue;
289  }
290  break;
291 
292  case 'l': // &lt;
293  if (next<'t',';'>(&src[2])) {
294  *dest = '<';
295  ++dest;
296  src += 4;
297  continue;
298  }
299  break;
300 
301  case '#': // &#...; - assumes ASCII
302  if (src[2] == 'x') {
303  uint32_t code = 0;
304  src += 3; // skip &#x
305  while (true) {
306  uint8_t digit = lutDigits[uint8_t(*src)];
307  if (digit == 0xFF) break;
308  code = code * 16 + digit;
309  ++src;
310  }
311  insertUTF8char(dest, code);
312  } else {
313  uint32_t code = 0;
314  src += 2; // skip &#
315  while (true) {
316  uint8_t digit = lutDigits[uint8_t(*src)];
317  if (digit == 0xFF) break;
318  code = code * 10 + digit;
319  ++src;
320  }
321  insertUTF8char(dest, code);
322  }
323  if (*src != ';') {
324  throw ParseError("expected ;", src);
325  }
326  ++src;
327  continue;
328 
329  default:
330  // Something else, ignore, just copy '&' verbatim
331  break;
332  }
333  }
334 
335  // Test if condensing is needed
336  if ((FLAGS & normalizeWhitespace) &&
337  (WhitespacePred::test(*src))) {
338  *dest++ = ' '; // single space in dest
339  ++src; // skip first whitespace char
340  // Skip remaining whitespace chars
341  while (WhitespacePred::test(*src)) ++src;
342  continue;
343  }
344 
345  // No replacement, only copy character
346  *dest++ = *src++;
347  }
348 
349  // Return new end
350  text = src;
351  return dest;
352 }
353 
354 inline void skipBOM(char*& text)
355 {
356  if (next<char(0xEF), char(0xBB), char(0xBF)>(text)) {
357  text += 3; // skip utf-8 bom
358  }
359 }
360 
361 
362 template<int FLAGS, typename HANDLER> class Parser
363 {
364  HANDLER& handler;
365 
366 public:
367  Parser(HANDLER& handler_, char* text)
368  : handler(handler_)
369  {
370  skipBOM(text);
371  while (true) {
372  // Skip whitespace before node
373  skip<WhitespacePred>(text);
374  if (*text == 0) break;
375 
376  if (*text != '<') {
377  throw ParseError("expected <", text);
378  }
379  ++text; // skip '<'
380  parseNode(text);
381  }
382  }
383 
384 private:
385  // Parse XML declaration (<?xml...)
386  void parseDeclaration(char*& text)
387  {
388  handler.declarationStart();
389  skip<WhitespacePred>(text); // skip ws before attributes or ?>
390  parseAttributes(text, true);
391  handler.declarationStop();
392 
393  // skip ?>
394  if (!next<'?','>'>(text)) {
395  throw ParseError("expected ?>", text);
396  }
397  text += 2;
398  }
399 
400  // Parse XML comment (<!--...)
401  void parseComment(char*& text)
402  {
403  // Skip until end of comment
404  char* value = text; // remember value start
405  while (!next<'-','-','>'>(text)) {
406  if (text[0] == 0) {
407  throw ParseError("unexpected end of data", text);
408  }
409  ++text;
410  }
411  if (FLAGS & zeroTerminateStrings) {
412  *text = '\0';
413  }
414  handler.comment(std::string_view(value, text - value));
415  text += 3; // skip '-->'
416  }
417 
418  void parseDoctype(char*& text)
419  {
420  char* value = text; // remember value start
421 
422  // skip to >
423  while (*text != '>') {
424  switch (*text) {
425  case '[': {
426  // If '[' encountered, scan for matching ending
427  // ']' using naive algorithm with depth. This
428  // works for all W3C test files except for 2
429  // most wicked.
430  ++text; // skip '['
431  int depth = 1;
432  while (depth > 0) {
433  switch (*text) {
434  case char('['): ++depth; break;
435  case char(']'): --depth; break;
436  case 0: throw ParseError(
437  "unexpected end of data", text);
438  }
439  ++text;
440  }
441  break;
442  }
443  case '\0':
444  throw ParseError("unexpected end of data", text);
445 
446  default:
447  ++text;
448  }
449  }
450 
451  if (FLAGS & zeroTerminateStrings) {
452  *text = '\0';
453  }
454  handler.doctype(std::string_view(value, text - value));
455  text += 1; // skip '>'
456  }
457 
458  void parsePI(char*& text)
459  {
460  // Extract PI target name
461  char* name = text;
462  skip<NodeNamePred>(text);
463  char* nameEnd = text;
464  if (name == nameEnd) {
465  throw ParseError("expected PI target", text);
466  }
467 
468  // Skip whitespace between pi target and pi
469  skip<WhitespacePred>(text);
470 
471  // Skip to '?>'
472  char* value = text; // Remember start of pi
473  while (!next<'?','>'>(text)) {
474  if (*text == 0) {
475  throw ParseError("unexpected end of data", text);
476  }
477  ++text;
478  }
479  // Set pi value (verbatim, no entity expansion or ws normalization)
480  if (FLAGS & zeroTerminateStrings) {
481  *nameEnd = '\0';
482  *text = '\0';
483  }
484  handler.procInstr(std::string_view(name, nameEnd - name),
485  std::string_view(value, text - value));
486  text += 2; // skip '?>'
487  }
488 
489  void parseText(char*& text, char* contentsStart)
490  {
491  // Backup to contents start if whitespace trimming is disabled
492  if constexpr (!(FLAGS & trimWhitespace)) {
493  text = contentsStart;
494  }
495  // Skip until end of data
496  char* value = text;
497  char* end = (FLAGS & normalizeWhitespace)
498  ? skipAndExpand<TextPred, TextPureWithWsPred, FLAGS>(text)
499  : skipAndExpand<TextPred, TextPureNoWsPred , FLAGS>(text);
500 
501  // Trim trailing whitespace; leading was already trimmed by
502  // whitespace skip after >
503  if constexpr ((FLAGS & trimWhitespace) != 0) {
504  if constexpr (FLAGS & normalizeWhitespace) {
505  // Whitespace is already condensed to single
506  // space characters by skipping function, so
507  // just trim 1 char off the end.
508  if (end[-1] == ' ') {
509  --end;
510  }
511  } else {
512  // Backup until non-whitespace character is found
513  while (WhitespacePred::test(end[-1])) {
514  --end;
515  }
516  }
517  }
518 
519  // check next char before calling handler.text()
520  if (*text == '\0') {
521  throw ParseError("unexpected end of data", text);
522  } else {
523  assert(*text == '<');
524  }
525 
526  // Handle text, but only if non-empty.
527  auto len = end - value;
528  if (len) {
529  if (FLAGS & zeroTerminateStrings) {
530  *text = '\0';
531  }
532  handler.text(std::string_view(value, len));
533  }
534  }
535 
536  void parseCdata(char*& text)
537  {
538  // Skip until end of cdata
539  char* value = text;
540  while (!next<']',']','>'>(text)) {
541  if (text[0] == 0) {
542  throw ParseError("unexpected end of data", text);
543  }
544  ++text;
545  }
546  if (FLAGS & zeroTerminateStrings) {
547  *text = '\0';
548  }
549  handler.cdata(std::string_view(value, text - value));
550  text += 3; // skip ]]>
551  }
552 
553  void parseElement(char*& text)
554  {
555  // Extract element name
556  char* name = text;
557  skip<NodeNamePred>(text);
558  char* nameEnd = text;
559  if (name == nameEnd) {
560  throw ParseError("expected element name", text);
561  }
562  handler.start(std::string_view(name, nameEnd - name));
563 
564  skip<WhitespacePred>(text); // skip ws before attributes or >
565  parseAttributes(text, false);
566 
567  // Determine ending type
568  if (*text == '>') {
569  if (FLAGS & zeroTerminateStrings) {
570  *nameEnd = '\0';
571  }
572  ++text;
573  parseNodeContents(text);
574  } else if (*text == '/') {
575  if (FLAGS & zeroTerminateStrings) {
576  *nameEnd = '\0';
577  }
578  handler.stop();
579  ++text;
580  if (*text != '>') {
581  throw ParseError("expected >", text);
582  }
583  ++text;
584  } else {
585  throw ParseError("expected >", text);
586  }
587  }
588 
589  // Determine node type, and parse it
590  void parseNode(char*& text)
591  {
592  switch (text[0]) {
593  case '?': // <?...
594  ++text; // skip ?
595  // Note: this doesn't detect mixed case (xMl), does
596  // that matter?
597  if ((next<'x','m','l'>(text) ||
598  next<'X','M','L'>(text)) &&
599  WhitespacePred::test(text[3])) {
600  // '<?xml ' - xml declaration
601  text += 4; // skip 'xml '
602  parseDeclaration(text);
603  } else {
604  parsePI(text);
605  }
606  break;
607 
608  case '!': // <!...
609  // Parse proper subset of <! node
610  switch (text[1]) {
611  case '-': // <!-
612  if (text[2] == '-') {
613  // '<!--' - xml comment
614  text += 3; // skip '!--'
615  parseComment(text);
616  return;
617  }
618  break;
619 
620  case '[': // <![
621  if (next<'C','D','A','T','A','['>(&text[2])) {
622  // '<![CDATA[' - cdata
623  text += 8; // skip '![CDATA['
624  parseCdata(text);
625  return;
626  }
627  break;
628 
629  case 'D': // <!D
630  if (next<'O','C','T','Y','P','E'>(&text[2]) &&
631  WhitespacePred::test(text[8])) {
632  // '<!DOCTYPE ' - doctype
633  text += 9; // skip '!DOCTYPE '
634  parseDoctype(text);
635  return;
636  }
637  break;
638  }
639  // Attempt to skip other, unrecognized types starting with <!
640  ++text; // skip !
641  while (*text != '>') {
642  if (*text == 0) {
643  throw ParseError(
644  "unexpected end of data", text);
645  }
646  ++text;
647  }
648  ++text; // skip '>'
649  break;
650 
651  default: // <...
652  parseElement(text);
653  break;
654  }
655  }
656 
657  // Parse contents of the node - children, data etc.
658  void parseNodeContents(char*& text)
659  {
660  while (true) {
661  char* contentsStart = text; // start before ws is skipped
662  skip<WhitespacePred>(text); // Skip ws between > and contents
663 
664  switch (*text) {
665  case '<': // Node closing or child node
666 afterText: // After parseText() jump here instead of continuing
667  // the loop, because skipping whitespace is unnecessary.
668  if (text[1] == '/') {
669  // Node closing
670  text += 2; // skip '</'
671  skip<NodeNamePred>(text);
672  // TODO validate closing tag??
673  handler.stop();
674  // Skip remaining whitespace after node name
675  skip<WhitespacePred>(text);
676  if (*text != '>') {
677  throw ParseError("expected >", text);
678  }
679  ++text; // skip '>'
680  return;
681  } else {
682  // Child node
683  ++text; // skip '<'
684  parseNode(text);
685  }
686  break;
687 
688  case '\0':
689  throw ParseError("unexpected end of data", text);
690 
691  default:
692  parseText(text, contentsStart);
693  goto afterText;
694  }
695  }
696  }
697 
698  // Parse XML attributes of the node
699  void parseAttributes(char*& text, bool declaration)
700  {
701  // For all attributes
702  while (AttributeNamePred::test(*text)) {
703  // Extract attribute name
704  char* name = text;
705  ++text; // Skip first character of attribute name
706  skip<AttributeNamePred>(text);
707  char* nameEnd = text;
708  if (name == nameEnd) {
709  throw ParseError("expected attribute name", name);
710  }
711 
712  skip<WhitespacePred>(text); // skip ws after name
713  if (*text != '=') {
714  throw ParseError("expected =", text);
715  }
716  ++text; // skip =
717  skip<WhitespacePred>(text); // skip ws after =
718 
719  // Skip quote and remember if it was ' or "
720  char quote = *text;
721  if (quote != one_of('\'', '"')) {
722  throw ParseError("expected ' or \"", text);
723  }
724  ++text;
725 
726  // Extract attribute value and expand char refs in it
727  // No whitespace normalization in attributes
728  constexpr int FLAGS2 = FLAGS & ~normalizeWhitespace;
729  char* value = text;
730  char* valueEnd = (quote == '\'')
731  ? skipAndExpand<AttPred1, AttPurePred1, FLAGS2>(text)
732  : skipAndExpand<AttPred2, AttPurePred2, FLAGS2>(text);
733  // Make sure that end quote is present
734  // check before calling handler.xxx()
735  if (*text != quote) {
736  throw ParseError("expected ' or \"", text);
737  }
738  ++text; // skip quote
739 
740  if (FLAGS & zeroTerminateStrings) {
741  *nameEnd = '\0';
742  *valueEnd = '\0';
743  }
744  if (!declaration) {
745  handler.attribute(std::string_view(name, nameEnd - name),
746  std::string_view(value, valueEnd - value));
747  } else {
748  handler.declAttribute(std::string_view(name, nameEnd - name),
749  std::string_view(value, valueEnd - value));
750  }
751 
752  skip<WhitespacePred>(text); // skip ws after value
753  }
754  }
755 };
756 
757 } // namespace internal
758 
759 template<int FLAGS, typename HANDLER>
760 inline void parse(HANDLER& handler, char* xml)
761 {
762  internal::Parser<FLAGS, HANDLER> parser(handler, xml);
763 }
764 
765 } // namespace rapidsax
766 
767 #endif
TclObject t
Definition: one_of.hh:7
void declAttribute(std::string_view, std::string_view)
Definition: rapidsax.hh:93
void text(std::string_view)
Definition: rapidsax.hh:78
void comment(std::string_view)
Definition: rapidsax.hh:88
void procInstr(std::string_view, std::string_view)
Definition: rapidsax.hh:100
void attribute(std::string_view, std::string_view)
Definition: rapidsax.hh:82
void doctype(std::string_view)
Definition: rapidsax.hh:97
void cdata(std::string_view)
Definition: rapidsax.hh:85
void start(std::string_view)
Definition: rapidsax.hh:64
char * where() const
Definition: rapidsax.hh:114
const char * what() const
Definition: rapidsax.hh:113
ParseError(const char *what_, char *where_)
Definition: rapidsax.hh:107
Parser(HANDLER &handler_, char *text)
Definition: rapidsax.hh:367
void insertUTF8char(char *&text, uint32_t code)
Definition: rapidsax.hh:180
const uint8_t lutDigits[256]
Definition: rapidsax.cc:36
void skipBOM(char *&text)
Definition: rapidsax.hh:354
const uint8_t lutChar[256]
Definition: rapidsax.cc:14
constexpr int noEntityTranslation
Definition: rapidsax.hh:47
constexpr int zeroTerminateStrings
Definition: rapidsax.hh:54
constexpr int trimWhitespace
Definition: rapidsax.hh:49
void parse(HANDLER &handler, char *xml)
Definition: rapidsax.hh:760
constexpr int normalizeWhitespace
Definition: rapidsax.hh:52
constexpr size_t EXTRA_BUFFER_SPACE
Definition: rapidsax.hh:41
static bool test(char ch)
Definition: rapidsax.hh:161
static bool test(char ch)
Definition: rapidsax.hh:165
static bool test(char ch)
Definition: rapidsax.hh:171
static bool test(char ch)
Definition: rapidsax.hh:176
static bool test(char ch)
Definition: rapidsax.hh:134
static bool test(char ch)
Definition: rapidsax.hh:144
static bool test(char ch)
Definition: rapidsax.hh:129