undead.xml source code

1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will remain until we have a suitable replacement,
6       but be aware that it will not remain long term.)
7 
8 Classes and functions for creating and parsing XML
9 
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
17 
18 Example: This example creates a DOM (Document Object Model) tree
19     from an XML file.
20 ------------------------------------------------------------------------------
21 import undead.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
25 
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30 
31 void main()
32 {
33     string s = cast(string) std.file.read("books.xml");
34 
35     // Check for well-formedness
36     check(s);
37 
38     // Make a DOM tree
39     auto doc = new Document(s);
40 
41     // Plain-print it
42     writeln(doc);
43 }
44 ------------------------------------------------------------------------------
45 
46 Example: This example does much the same thing, except that the file is
47     deconstructed and reconstructed by hand. This is more work, but the
48     techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import undead.xml;
51 import std.stdio;
52 import std.string;
53 
54 struct Book
55 {
56     string id;
57     string author;
58     string title;
59     string genre;
60     string price;
61     string pubDate;
62     string description;
63 }
64 
65 void main()
66 {
67     string s = cast(string) std.file.read("books.xml");
68 
69     // Check for well-formedness
70     check(s);
71 
72     // Take it apart
73     Book[] books;
74 
75     auto xml = new DocumentParser(s);
76     xml.onStartTag["book"] = (ElementParser xml)
77     {
78         Book book;
79         book.id = xml.tag.attr["id"];
80 
81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
87 
88         xml.parse();
89 
90         books ~= book;
91     };
92     xml.parse();
93 
94     // Put it back together again;
95     auto doc = new Document(new Tag("catalog"));
96     foreach (book;books)
97     {
98         auto element = new Element("book");
99         element.tag.attr["id"] = book.id;
100 
101         element ~= new Element("author",      book.author);
102         element ~= new Element("title",       book.title);
103         element ~= new Element("genre",       book.genre);
104         element ~= new Element("price",       book.price);
105         element ~= new Element("publish-date",book.pubDate);
106         element ~= new Element("description", book.description);
107 
108         doc ~= element;
109     }
110 
111     // Pretty-print it
112     writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors:   Janice Caron
118 Source:    $(PHOBOSSRC std/xml.d)
119 */
120 /*
121          Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123    (See accompanying file LICENSE_1_0.txt or copy at
124          http://www.boost.org/LICENSE_1_0.txt)
125 */
126 module undead.xml;
127 
128 enum cdata = "<![CDATA[";
129 
130 /**
131  * Returns true if the character is a character according to the XML standard
132  *
133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134  *
135  * Params:
136  *    c = the character to be tested
137  */
138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139 {
140     if (c <= 0xD7FF)
141     {
142         if (c >= 0x20)
143             return true;
144         switch (c)
145         {
146         case 0xA:
147         case 0x9:
148         case 0xD:
149             return true;
150         default:
151             return false;
152         }
153     }
154     else if (0xE000 <= c && c <= 0x10FFFF)
155     {
156         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157             return true;
158     }
159     return false;
160 }
161 
162 @safe @nogc nothrow pure unittest
163 {
164     assert(!isChar(cast(dchar) 0x8));
165     assert( isChar(cast(dchar) 0x9));
166     assert( isChar(cast(dchar) 0xA));
167     assert(!isChar(cast(dchar) 0xB));
168     assert(!isChar(cast(dchar) 0xC));
169     assert( isChar(cast(dchar) 0xD));
170     assert(!isChar(cast(dchar) 0xE));
171     assert(!isChar(cast(dchar) 0x1F));
172     assert( isChar(cast(dchar) 0x20));
173     assert( isChar('J'));
174     assert( isChar(cast(dchar) 0xD7FF));
175     assert(!isChar(cast(dchar) 0xD800));
176     assert(!isChar(cast(dchar) 0xDFFF));
177     assert( isChar(cast(dchar) 0xE000));
178     assert( isChar(cast(dchar) 0xFFFD));
179     assert(!isChar(cast(dchar) 0xFFFE));
180     assert(!isChar(cast(dchar) 0xFFFF));
181     assert( isChar(cast(dchar) 0x10000));
182     assert( isChar(cast(dchar) 0x10FFFF));
183     assert(!isChar(cast(dchar) 0x110000));
184 
185     debug (stdxml_TestHardcodedChecks)
186     {
187         foreach (c; 0 .. dchar.max + 1)
188             assert(isChar(c) == lookup(CharTable, c));
189     }
190 }
191 
192 /**
193  * Returns true if the character is whitespace according to the XML standard
194  *
195  * Only the following characters are considered whitespace in XML - space, tab,
196  * carriage return and linefeed
197  *
198  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199  *
200  * Params:
201  *    c = the character to be tested
202  */
203 bool isSpace(dchar c) @safe @nogc pure nothrow
204 {
205     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206 }
207 
208 /**
209  * Returns true if the character is a digit according to the XML standard
210  *
211  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212  *
213  * Params:
214  *    c = the character to be tested
215  */
216 bool isDigit(dchar c) @safe @nogc pure nothrow
217 {
218     if (c <= 0x0039 && c >= 0x0030)
219         return true;
220     else
221         return lookup(DigitTable,c);
222 }
223 
224 @safe @nogc nothrow pure unittest
225 {
226     debug (stdxml_TestHardcodedChecks)
227     {
228         foreach (c; 0 .. dchar.max + 1)
229             assert(isDigit(c) == lookup(DigitTable, c));
230     }
231 }
232 
233 /**
234  * Returns true if the character is a letter according to the XML standard
235  *
236  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237  *
238  * Params:
239  *    c = the character to be tested
240  */
241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242 {
243     return isIdeographic(c) || isBaseChar(c);
244 }
245 
246 /**
247  * Returns true if the character is an ideographic character according to the
248  * XML standard
249  *
250  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251  *
252  * Params:
253  *    c = the character to be tested
254  */
255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
256 {
257     if (c == 0x3007)
258         return true;
259     if (c <= 0x3029 && c >= 0x3021 )
260         return true;
261     if (c <= 0x9FA5 && c >= 0x4E00)
262         return true;
263     return false;
264 }
265 
266 @safe @nogc nothrow pure unittest
267 {
268     assert(isIdeographic('\u4E00'));
269     assert(isIdeographic('\u9FA5'));
270     assert(isIdeographic('\u3007'));
271     assert(isIdeographic('\u3021'));
272     assert(isIdeographic('\u3029'));
273 
274     debug (stdxml_TestHardcodedChecks)
275     {
276         foreach (c; 0 .. dchar.max + 1)
277             assert(isIdeographic(c) == lookup(IdeographicTable, c));
278     }
279 }
280 
281 /**
282  * Returns true if the character is a base character according to the XML
283  * standard
284  *
285  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286  *
287  * Params:
288  *    c = the character to be tested
289  */
290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
291 {
292     return lookup(BaseCharTable,c);
293 }
294 
295 /**
296  * Returns true if the character is a combining character according to the
297  * XML standard
298  *
299  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300  *
301  * Params:
302  *    c = the character to be tested
303  */
304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305 {
306     return lookup(CombiningCharTable,c);
307 }
308 
309 /**
310  * Returns true if the character is an extender according to the XML standard
311  *
312  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313  *
314  * Params:
315  *    c = the character to be tested
316  */
317 bool isExtender(dchar c) @safe @nogc nothrow pure
318 {
319     return lookup(ExtenderTable,c);
320 }
321 
322 /**
323  * Encodes a string by replacing all characters which need to be escaped with
324  * appropriate predefined XML entities.
325  *
326  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327  * and greater-than), and similarly, decode() unescapes them. These functions
328  * are provided for convenience only. You do not need to use them when using
329  * the undead.xml classes, because then all the encoding and decoding will be done
330  * for you automatically.
331  *
332  * If the string is not modified, the original will be returned.
333  *
334  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335  *
336  * Params:
337  *      s = The string to be encoded
338  *
339  * Returns: The encoded string
340  *
341  * Example:
342  * --------------
343  * writefln(encode("a > b")); // writes "a &gt; b"
344  * --------------
345  */
346 S encode(S)(S s)
347 {
348     import std.array : appender;
349 
350     string r;
351     size_t lastI;
352     auto result = appender!S();
353 
354     foreach (i, c; s)
355     {
356         switch (c)
357         {
358         case '&':  r = "&amp;"; break;
359         case '"':  r = "&quot;"; break;
360         case '\'': r = "&apos;"; break;
361         case '<':  r = "&lt;"; break;
362         case '>':  r = "&gt;"; break;
363         default: continue;
364         }
365         // Replace with r
366         result.put(s[lastI .. i]);
367         result.put(r);
368         lastI = i + 1;
369     }
370 
371     if (!result.data.ptr) return s;
372     result.put(s[lastI .. $]);
373     return result.data;
374 }
375 
376 @safe pure unittest
377 {
378     auto s = "hello";
379     assert(encode(s) is s);
380     assert(encode("a > b") == "a &gt; b", encode("a > b"));
381     assert(encode("a < b") == "a &lt; b");
382     assert(encode("don't") == "don&apos;t");
383     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384     assert(encode("cat & dog") == "cat &amp; dog");
385 }
386 
387 /**
388  * Mode to use for decoding.
389  *
390  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393  */
394 enum DecodeMode
395 {
396     NONE, LOOSE, STRICT
397 }
398 
399 /**
400  * Decodes a string by unescaping all predefined XML entities.
401  *
402  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403  * and greater-than), and similarly, decode() unescapes them. These functions
404  * are provided for convenience only. You do not need to use them when using
405  * the undead.xml classes, because then all the encoding and decoding will be done
406  * for you automatically.
407  *
408  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409  * &amp;lt; and &amp;gt,
410  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
411  *
412  * If the string does not contain an ampersand, the original will be returned.
413  *
414  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416  * (decode, and throw a DecodeException in the event of an error).
417  *
418  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419  *
420  * Params:
421  *      s = The string to be decoded
422  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423  *
424  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425  *
426  * Returns: The decoded string
427  *
428  * Example:
429  * --------------
430  * writefln(decode("a &gt; b")); // writes "a > b"
431  * --------------
432  */
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434 {
435     import std.algorithm.searching : startsWith;
436 
437     if (mode == DecodeMode.NONE) return s;
438 
439     string buffer;
440     for (size_t i = 0; i < s.length; i++)
441     {
442         char c = s[i];
443         if (c != '&')
444         {
445             if (buffer.length != 0) buffer ~= c;
446         }
447         else
448         {
449             if (buffer.length == 0)
450             {
451                 buffer = s[0 .. i].dup;
452             }
453             if (startsWith(s[i..$],"&#"))
454             {
455                 try
456                 {
457                     dchar d;
458                     string t = s[i..$];
459                     checkCharRef(t, d);
460                     char[4] temp;
461                     import std.utf : encode;
462                     buffer ~= temp[0 .. encode(temp, d)];
463                     i = s.length - t.length - 1;
464                 }
465                 catch (Err e)
466                 {
467                     if (mode == DecodeMode.STRICT)
468                         throw new DecodeException("Unescaped &");
469                     buffer ~= '&';
470                 }
471             }
472             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
473             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
474             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
476             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
477             else
478             {
479                 if (mode == DecodeMode.STRICT)
480                     throw new DecodeException("Unescaped &");
481                 buffer ~= '&';
482             }
483         }
484     }
485     return (buffer.length == 0) ? s : buffer;
486 }
487 
488 @safe pure unittest
489 {
490     void assertNot(string s) pure
491     {
492         bool b = false;
493         try { decode(s,DecodeMode.STRICT); }
494         catch (DecodeException e) { b = true; }
495         assert(b,s);
496     }
497 
498     // Assert that things that should work, do
499     auto s = "hello";
500     assert(decode(s,                DecodeMode.STRICT) is s);
501     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
502     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
503     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
504     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
506     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
507     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
508     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
509     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
510     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
511     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
512     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
513     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
514 
515     // Assert that things that shouldn't work, don't
516     assertNot("cat & dog");
517     assertNot("a &gt b");
518     assertNot("&#;");
519     assertNot("&#x;");
520     assertNot("&#2G;");
521     assertNot("&#x2G;");
522 }
523 
524 /**
525  * Class representing an XML document.
526  *
527  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528  *
529  */
530 class Document : Element
531 {
532     /**
533      * Contains all text which occurs before the root element.
534      * Defaults to &lt;?xml version="1.0"?&gt;
535      */
536     string prolog = "<?xml version=\"1.0\"?>";
537     /**
538      * Contains all text which occurs after the root element.
539      * Defaults to the empty string
540      */
541     string epilog;
542 
543     /**
544      * Constructs a Document by parsing XML text.
545      *
546      * This function creates a complete DOM (Document Object Model) tree.
547      *
548      * The input to this function MUST be valid XML.
549      * This is enforced by DocumentParser's in contract.
550      *
551      * Params:
552      *      s = the complete XML text.
553      */
554     this(string s)
555     in
556     {
557         assert(s.length != 0);
558     }
559     do
560     {
561         auto xml = new DocumentParser(s);
562         string tagString = xml.tag.tagString;
563 
564         this(xml.tag);
565         prolog = s[0 .. tagString.ptr - s.ptr];
566         parse(xml);
567         epilog = *xml.s;
568     }
569 
570     /**
571      * Constructs a Document from a Tag.
572      *
573      * Params:
574      *      tag = the start tag of the document.
575      */
576     this(const(Tag) tag)
577     {
578         super(tag);
579     }
580 
581     const
582     {
583         /**
584          * Compares two Documents for equality
585          *
586          * Example:
587          * --------------
588          * Document d1,d2;
589          * if (d1 == d2) { }
590          * --------------
591          */
592         override bool opEquals(scope const Object o) const
593         {
594             const scope doc = toType!(const Document)(o);
595             return prolog == doc.prolog
596                 && (cast(const) this).Element.opEquals(cast(const) doc)
597                 && epilog == doc.epilog;
598         }
599 
600         /**
601          * Compares two Documents
602          *
603          * You should rarely need to call this function. It exists so that
604          * Documents can be used as associative array keys.
605          *
606          * Example:
607          * --------------
608          * Document d1,d2;
609          * if (d1 < d2) { }
610          * --------------
611          */
612         override int opCmp(scope const Object o) scope const
613         {
614             const scope doc = toType!(const Document)(o);
615             if (prolog != doc.prolog)
616                 return prolog < doc.prolog ? -1 : 1;
617             if (int cmp = this.Element.opCmp(doc))
618                 return cmp;
619             if (epilog != doc.epilog)
620                 return epilog < doc.epilog ? -1 : 1;
621             return 0;
622         }
623 
624         /**
625          * Returns the hash of a Document
626          *
627          * You should rarely need to call this function. It exists so that
628          * Documents can be used as associative array keys.
629          */
630         override size_t toHash() scope const @trusted
631         {
632             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633         }
634 
635         /**
636          * Returns the string representation of a Document. (That is, the
637          * complete XML of a document).
638          */
639         override string toString() scope const @safe
640         {
641             return prolog ~ super.toString() ~ epilog;
642         }
643     }
644 }
645 
646 @system unittest
647 {
648     // https://issues.dlang.org/show_bug.cgi?id=14966
649     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
650 
651     auto a = new Document(xml);
652     auto b = new Document(xml);
653     assert(a == b);
654     assert(!(a < b));
655     int[Document] aa;
656     aa[a] = 1;
657     assert(aa[b] == 1);
658 
659     b ~= new Element("b");
660     assert(a < b);
661     assert(b > a);
662 }
663 
664 /**
665  * Class representing an XML element.
666  *
667  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668  */
669 class Element : Item
670 {
671     Tag tag; /// The start tag of the element
672     Item[] items; /// The element's items
673     Text[] texts; /// The element's text items
674     CData[] cdatas; /// The element's CData items
675     Comment[] comments; /// The element's comments
676     ProcessingInstruction[] pis; /// The element's processing instructions
677     Element[] elements; /// The element's child elements
678 
679     /**
680      * Constructs an Element given a name and a string to be used as a Text
681      * interior.
682      *
683      * Params:
684      *      name = the name of the element.
685      *      interior = (optional) the string interior.
686      *
687      * Example:
688      * -------------------------------------------------------
689      * auto element = new Element("title","Serenity")
690      *     // constructs the element <title>Serenity</title>
691      * -------------------------------------------------------
692      */
693     this(string name, string interior=null) @safe pure
694     {
695         this(new Tag(name));
696         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
697     }
698 
699     /**
700      * Constructs an Element from a Tag.
701      *
702      * Params:
703      *      tag_ = the start or empty tag of the element.
704      */
705     this(const(Tag) tag_) @safe pure
706     {
707         this.tag = new Tag(tag_.name);
708         tag.type = TagType.EMPTY;
709         foreach (k,v;tag_.attr) tag.attr[k] = v;
710         tag.tagString = tag_.tagString;
711     }
712 
713     /**
714      * Append a text item to the interior of this element
715      *
716      * Params:
717      *      item = the item you wish to append.
718      *
719      * Example:
720      * --------------
721      * Element element;
722      * element ~= new Text("hello");
723      * --------------
724      */
725     void opOpAssign(string op)(Text item) @safe pure
726         if (op == "~")
727     {
728         texts ~= item;
729         appendItem(item);
730     }
731 
732     /**
733      * Append a CData item to the interior of this element
734      *
735      * Params:
736      *      item = the item you wish to append.
737      *
738      * Example:
739      * --------------
740      * Element element;
741      * element ~= new CData("hello");
742      * --------------
743      */
744     void opOpAssign(string op)(CData item) @safe pure
745         if (op == "~")
746     {
747         cdatas ~= item;
748         appendItem(item);
749     }
750 
751     /**
752      * Append a comment to the interior of this element
753      *
754      * Params:
755      *      item = the item you wish to append.
756      *
757      * Example:
758      * --------------
759      * Element element;
760      * element ~= new Comment("hello");
761      * --------------
762      */
763     void opOpAssign(string op)(Comment item) @safe pure
764         if (op == "~")
765     {
766         comments ~= item;
767         appendItem(item);
768     }
769 
770     /**
771      * Append a processing instruction to the interior of this element
772      *
773      * Params:
774      *      item = the item you wish to append.
775      *
776      * Example:
777      * --------------
778      * Element element;
779      * element ~= new ProcessingInstruction("hello");
780      * --------------
781      */
782     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
783         if (op == "~")
784     {
785         pis ~= item;
786         appendItem(item);
787     }
788 
789     /**
790      * Append a complete element to the interior of this element
791      *
792      * Params:
793      *      item = the item you wish to append.
794      *
795      * Example:
796      * --------------
797      * Element element;
798      * Element other = new Element("br");
799      * element ~= other;
800      *    // appends element representing <br />
801      * --------------
802      */
803     void opOpAssign(string op)(Element item) @safe pure
804         if (op == "~")
805     {
806         elements ~= item;
807         appendItem(item);
808     }
809 
810     private void appendItem(Item item) @safe pure
811     {
812         items ~= item;
813         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
814             tag.type = TagType.START;
815     }
816 
817     private void parse(ElementParser xml)
818     {
819         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
820         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
821         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
822         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
823 
824         xml.onStartTag[null] = (ElementParser xml)
825         {
826             auto e = new Element(xml.tag);
827             e.parse(xml);
828             opOpAssign!("~")(e);
829         };
830 
831         xml.parse();
832     }
833 
834     /**
835      * Compares two Elements for equality
836      *
837      * Example:
838      * --------------
839      * Element e1,e2;
840      * if (e1 == e2) { }
841      * --------------
842      */
843     override bool opEquals(scope const Object o) const
844     {
845         const scope element = toType!(const Element)(o);
846         immutable len = items.length;
847         if (len != element.items.length) return false;
848         foreach (i; 0 .. len)
849         {
850             if (!items[i].opEquals(element.items[i])) return false;
851         }
852         return true;
853     }
854 
855     /**
856      * Compares two Elements
857      *
858      * You should rarely need to call this function. It exists so that Elements
859      * can be used as associative array keys.
860      *
861      * Example:
862      * --------------
863      * Element e1,e2;
864      * if (e1 < e2) { }
865      * --------------
866      */
867     override int opCmp(scope const Object o) @safe const
868     {
869         const scope element = toType!(const Element)(o);
870         for (uint i=0; ; ++i)
871         {
872             if (i == items.length && i == element.items.length) return 0;
873             if (i == items.length) return -1;
874             if (i == element.items.length) return 1;
875             if (!items[i].opEquals(element.items[i]))
876                 return items[i].opCmp(element.items[i]);
877         }
878     }
879 
880     /**
881      * Returns the hash of an Element
882      *
883      * You should rarely need to call this function. It exists so that Elements
884      * can be used as associative array keys.
885      */
886     override size_t toHash() scope const @safe
887     {
888         size_t hash = tag.toHash();
889         foreach (item;items) hash += item.toHash();
890         return hash;
891     }
892 
893     const
894     {
895         /**
896          * Returns the decoded interior of an element.
897          *
898          * The element is assumed to contain text <i>only</i>. So, for
899          * example, given XML such as "&lt;title&gt;Good &amp;amp;
900          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
901          *
902          * Params:
903          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
904          *
905          * Throws: DecodeException if decode fails
906          */
907         string text(DecodeMode mode=DecodeMode.LOOSE)
908         {
909             string buffer;
910             foreach (item;items)
911             {
912                 Text t = cast(Text) item;
913                 if (t is null) throw new DecodeException(item.toString());
914                 buffer ~= decode(t.toString(),mode);
915             }
916             return buffer;
917         }
918 
919         /**
920          * Returns an indented string representation of this item
921          *
922          * Params:
923          *      indent = (optional) number of spaces by which to indent this
924          *          element. Defaults to 2.
925          */
926         override string[] pretty(uint indent=2) scope
927         {
928             import std.algorithm.searching : count;
929             import std.string : rightJustify;
930 
931             if (isEmptyXML) return [ tag.toEmptyString() ];
932 
933             if (items.length == 1)
934             {
935                 auto t = cast(const(Text))(items[0]);
936                 if (t !is null)
937                 {
938                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
939                 }
940             }
941 
942             string[] a = [ tag.toStartString() ];
943             foreach (item;items)
944             {
945                 string[] b = item.pretty(indent);
946                 foreach (s;b)
947                 {
948                     a ~= rightJustify(s,count(s) + indent);
949                 }
950             }
951             a ~= tag.toEndString();
952             return a;
953         }
954 
955         /**
956          * Returns the string representation of an Element
957          *
958          * Example:
959          * --------------
960          * auto element = new Element("br");
961          * writefln(element.toString()); // writes "<br />"
962          * --------------
963          */
964         override string toString() scope @safe
965         {
966             if (isEmptyXML) return tag.toEmptyString();
967 
968             string buffer = tag.toStartString();
969             foreach (item;items) { buffer ~= item.toString(); }
970             buffer ~= tag.toEndString();
971             return buffer;
972         }
973 
974         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
975     }
976 }
977 
978 /**
979  * Tag types.
980  *
981  * $(DDOC_ENUM_MEMBERS START) Used for start tags
982  * $(DDOC_ENUM_MEMBERS END) Used for end tags
983  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
984  *
985  */
986 enum TagType { START, END, EMPTY }
987 
988 /**
989  * Class representing an XML tag.
990  *
991  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
992  *
993  * The class invariant guarantees
994  * <ul>
995  * <li> that $(B type) is a valid enum TagType value</li>
996  * <li> that $(B name) consists of valid characters</li>
997  * <li> that each attribute name consists of valid characters</li>
998  * </ul>
999  */
1000 class Tag
1001 {
1002     TagType type = TagType.START;   /// Type of tag
1003     string name;                    /// Tag name
1004     string[string] attr;            /// Associative array of attributes
1005     private string tagString;
1006 
1007     invariant()
1008     {
1009         string s;
1010         string t;
1011 
1012         assert(type == TagType.START
1013             || type == TagType.END
1014             || type == TagType.EMPTY);
1015 
1016         s = name;
1017         try { checkName(s,t); }
1018         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1019 
1020         foreach (k,v;attr)
1021         {
1022             s = k;
1023             try { checkName(s,t); }
1024             catch (Err e)
1025                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1026         }
1027     }
1028 
1029     /**
1030      * Constructs an instance of Tag with a specified name and type
1031      *
1032      * The constructor does not initialize the attributes. To initialize the
1033      * attributes, you access the $(B attr) member variable.
1034      *
1035      * Params:
1036      *      name = the Tag's name
1037      *      type = (optional) the Tag's type. If omitted, defaults to
1038      *          TagType.START.
1039      *
1040      * Example:
1041      * --------------
1042      * auto tag = new Tag("img",Tag.EMPTY);
1043      * tag.attr["src"] = "http://example.com/example.jpg";
1044      * --------------
1045      */
1046     this(string name, TagType type=TagType.START) @safe pure
1047     {
1048         this.name = name;
1049         this.type = type;
1050     }
1051 
1052     /* Private constructor (so don't ddoc this!)
1053      *
1054      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1055      *
1056      * The string is passed by reference, and is advanced over all characters
1057      * consumed.
1058      *
1059      * The second parameter is a dummy parameter only, required solely to
1060      * distinguish this constructor from the public one.
1061      */
1062     private this(ref string s, bool dummy) @safe pure
1063     {
1064         import std.algorithm.searching : countUntil;
1065         import std.ascii : isWhite;
1066         import std.utf : byCodeUnit;
1067 
1068         tagString = s;
1069         try
1070         {
1071             reqc(s,'<');
1072             if (optc(s,'/')) type = TagType.END;
1073             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1074             name = s[0 .. i];
1075             s = s[i .. $];
1076 
1077             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1078             s = s[i .. $];
1079 
1080             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1081             {
1082                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1083                 string key = s[0 .. i];
1084                 s = s[i .. $];
1085 
1086                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1087                 s = s[i .. $];
1088                 reqc(s,'=');
1089                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1090                 s = s[i .. $];
1091 
1092                 immutable char quote = requireOneOf(s,"'\"");
1093                 i = s.byCodeUnit.countUntil(quote);
1094                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1095                 s = s[i .. $];
1096                 reqc(s,quote);
1097 
1098                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1099                 s = s[i .. $];
1100                 attr[key] = val;
1101             }
1102             if (optc(s,'/'))
1103             {
1104                 if (type == TagType.END) throw new TagException("");
1105                 type = TagType.EMPTY;
1106             }
1107             reqc(s,'>');
1108             tagString.length = tagString.length - s.length;
1109         }
1110         catch (XMLException e)
1111         {
1112             tagString.length = tagString.length - s.length;
1113             throw new TagException(tagString);
1114         }
1115     }
1116 
1117     const
1118     {
1119         /**
1120          * Compares two Tags for equality
1121          *
1122          * You should rarely need to call this function. It exists so that Tags
1123          * can be used as associative array keys.
1124          *
1125          * Example:
1126          * --------------
1127          * Tag tag1,tag2
1128          * if (tag1 == tag2) { }
1129          * --------------
1130          */
1131         override bool opEquals(scope Object o)
1132         {
1133             const tag = toType!(const Tag)(o);
1134             return
1135                 (name != tag.name) ? false : (
1136                 (attr != tag.attr) ? false : (
1137                 (type != tag.type) ? false : (
1138             true )));
1139         }
1140 
1141         /**
1142          * Compares two Tags
1143          *
1144          * Example:
1145          * --------------
1146          * Tag tag1,tag2
1147          * if (tag1 < tag2) { }
1148          * --------------
1149          */
1150         override int opCmp(Object o)
1151         {
1152             const tag = toType!(const Tag)(o);
1153             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1154             return
1155                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1156                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1157                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1158             0 )));
1159         }
1160 
1161         /**
1162          * Returns the hash of a Tag
1163          *
1164          * You should rarely need to call this function. It exists so that Tags
1165          * can be used as associative array keys.
1166          */
1167         override size_t toHash()
1168         {
1169             return .hashOf(name);
1170         }
1171 
1172         /**
1173          * Returns the string representation of a Tag
1174          *
1175          * Example:
1176          * --------------
1177          * auto tag = new Tag("book",TagType.START);
1178          * writefln(tag.toString()); // writes "<book>"
1179          * --------------
1180          */
1181         override string toString() @safe
1182         {
1183             if (isEmpty) return toEmptyString();
1184             return (isEnd) ? toEndString() : toStartString();
1185         }
1186 
1187         private
1188         {
1189             string toNonEndString() @safe
1190             {
1191                 import std.format : format;
1192 
1193                 string s = "<" ~ name;
1194                 foreach (key,val;attr)
1195                     s ~= format(" %s=\"%s\"",key,encode(val));
1196                 return s;
1197             }
1198 
1199             string toStartString() @safe { return toNonEndString() ~ ">"; }
1200 
1201             string toEndString() @safe { return "</" ~ name ~ ">"; }
1202 
1203             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1204         }
1205 
1206         /**
1207          * Returns true if the Tag is a start tag
1208          *
1209          * Example:
1210          * --------------
1211          * if (tag.isStart) { }
1212          * --------------
1213          */
1214         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1215 
1216         /**
1217          * Returns true if the Tag is an end tag
1218          *
1219          * Example:
1220          * --------------
1221          * if (tag.isEnd) { }
1222          * --------------
1223          */
1224         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1225 
1226         /**
1227          * Returns true if the Tag is an empty tag
1228          *
1229          * Example:
1230          * --------------
1231          * if (tag.isEmpty) { }
1232          * --------------
1233          */
1234         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1235     }
1236 }
1237 
1238 /**
1239  * Class representing a comment
1240  */
1241 class Comment : Item
1242 {
1243     private string content;
1244 
1245     /**
1246      * Construct a comment
1247      *
1248      * Params:
1249      *      content = the body of the comment
1250      *
1251      * Throws: CommentException if the comment body is illegal (contains "--"
1252      * or exactly equals "-")
1253      *
1254      * Example:
1255      * --------------
1256      * auto item = new Comment("This is a comment");
1257      *    // constructs <!--This is a comment-->
1258      * --------------
1259      */
1260     this(string content) @safe pure
1261     {
1262         import std.string : indexOf;
1263 
1264         if (content == "-" || content.indexOf("--") != -1)
1265             throw new CommentException(content);
1266         this.content = content;
1267     }
1268 
1269     /**
1270      * Compares two comments for equality
1271      *
1272      * Example:
1273      * --------------
1274      * Comment item1,item2;
1275      * if (item1 == item2) { }
1276      * --------------
1277      */
1278     override bool opEquals(scope const Object o) const
1279     {
1280         const scope item = toType!(const Item)(o);
1281         const t = cast(const Comment) item;
1282         return t !is null && content == t.content;
1283     }
1284 
1285     /**
1286      * Compares two comments
1287      *
1288      * You should rarely need to call this function. It exists so that Comments
1289      * can be used as associative array keys.
1290      *
1291      * Example:
1292      * --------------
1293      * Comment item1,item2;
1294      * if (item1 < item2) { }
1295      * --------------
1296      */
1297     override int opCmp(scope const Object o) scope const
1298     {
1299         const scope item = toType!(const Item)(o);
1300         const t = cast(const Comment) item;
1301         return t !is null && (content != t.content
1302             ? (content < t.content ? -1 : 1 ) : 0 );
1303     }
1304 
1305     /**
1306      * Returns the hash of a Comment
1307      *
1308      * You should rarely need to call this function. It exists so that Comments
1309      * can be used as associative array keys.
1310      */
1311     override size_t toHash() scope const nothrow { return hash(content); }
1312 
1313     /**
1314      * Returns a string representation of this comment
1315      */
1316     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1317 
1318     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1319 }
1320 
1321 @safe unittest // issue 16241
1322 {
1323     import std.exception : assertThrown;
1324     auto c = new Comment("==");
1325     assert(c.content == "==");
1326     assertThrown!CommentException(new Comment("--"));
1327 }
1328 
1329 /**
1330  * Class representing a Character Data section
1331  */
1332 class CData : Item
1333 {
1334     private string content;
1335 
1336     /**
1337      * Construct a character data section
1338      *
1339      * Params:
1340      *      content = the body of the character data segment
1341      *
1342      * Throws: CDataException if the segment body is illegal (contains "]]>")
1343      *
1344      * Example:
1345      * --------------
1346      * auto item = new CData("<b>hello</b>");
1347      *    // constructs <![CDATA[<b>hello</b>]]>
1348      * --------------
1349      */
1350     this(string content) @safe pure
1351     {
1352         import std.string : indexOf;
1353         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1354         this.content = content;
1355     }
1356 
1357     /**
1358      * Compares two CDatas for equality
1359      *
1360      * Example:
1361      * --------------
1362      * CData item1,item2;
1363      * if (item1 == item2) { }
1364      * --------------
1365      */
1366     override bool opEquals(scope const Object o) const
1367     {
1368         const scope item = toType!(const Item)(o);
1369         const t = cast(const CData) item;
1370         return t !is null && content == t.content;
1371     }
1372 
1373     /**
1374      * Compares two CDatas
1375      *
1376      * You should rarely need to call this function. It exists so that CDatas
1377      * can be used as associative array keys.
1378      *
1379      * Example:
1380      * --------------
1381      * CData item1,item2;
1382      * if (item1 < item2) { }
1383      * --------------
1384      */
1385     override int opCmp(scope const Object o) scope const
1386     {
1387         const scope item = toType!(const Item)(o);
1388         const t = cast(const CData) item;
1389         return t !is null && (content != t.content
1390             ? (content < t.content ? -1 : 1 ) : 0 );
1391     }
1392 
1393     /**
1394      * Returns the hash of a CData
1395      *
1396      * You should rarely need to call this function. It exists so that CDatas
1397      * can be used as associative array keys.
1398      */
1399     override size_t toHash() scope const nothrow { return hash(content); }
1400 
1401     /**
1402      * Returns a string representation of this CData section
1403      */
1404     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1405 
1406     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1407 }
1408 
1409 /**
1410  * Class representing a text (aka Parsed Character Data) section
1411  */
1412 class Text : Item
1413 {
1414     private string content;
1415 
1416     /**
1417      * Construct a text (aka PCData) section
1418      *
1419      * Params:
1420      *      content = the text. This function encodes the text before
1421      *      insertion, so it is safe to insert any text
1422      *
1423      * Example:
1424      * --------------
1425      * auto Text = new CData("a < b");
1426      *    // constructs a &lt; b
1427      * --------------
1428      */
1429     this(string content) @safe pure
1430     {
1431         this.content = encode(content);
1432     }
1433 
1434     /**
1435      * Compares two text sections for equality
1436      *
1437      * Example:
1438      * --------------
1439      * Text item1,item2;
1440      * if (item1 == item2) { }
1441      * --------------
1442      */
1443     override bool opEquals(scope const Object o) const
1444     {
1445         const scope item = toType!(const Item)(o);
1446         const t = cast(const Text) item;
1447         return t !is null && content == t.content;
1448     }
1449 
1450     /**
1451      * Compares two text sections
1452      *
1453      * You should rarely need to call this function. It exists so that Texts
1454      * can be used as associative array keys.
1455      *
1456      * Example:
1457      * --------------
1458      * Text item1,item2;
1459      * if (item1 < item2) { }
1460      * --------------
1461      */
1462     override int opCmp(scope const Object o) scope const
1463     {
1464         const scope item = toType!(const Item)(o);
1465         const t = cast(const Text) item;
1466         return t !is null
1467             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1468     }
1469 
1470     /**
1471      * Returns the hash of a text section
1472      *
1473      * You should rarely need to call this function. It exists so that Texts
1474      * can be used as associative array keys.
1475      */
1476     override size_t toHash() scope const nothrow { return hash(content); }
1477 
1478     /**
1479      * Returns a string representation of this Text section
1480      */
1481     override string toString() scope const @safe @nogc pure nothrow { return content; }
1482 
1483     /**
1484      * Returns true if the content is the empty string
1485      */
1486     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1487 }
1488 
1489 /**
1490  * Class representing an XML Instruction section
1491  */
1492 class XMLInstruction : Item
1493 {
1494     private string content;
1495 
1496     /**
1497      * Construct an XML Instruction section
1498      *
1499      * Params:
1500      *      content = the body of the instruction segment
1501      *
1502      * Throws: XIException if the segment body is illegal (contains ">")
1503      *
1504      * Example:
1505      * --------------
1506      * auto item = new XMLInstruction("ATTLIST");
1507      *    // constructs <!ATTLIST>
1508      * --------------
1509      */
1510     this(string content) @safe pure
1511     {
1512         import std.string : indexOf;
1513         if (content.indexOf(">") != -1) throw new XIException(content);
1514         this.content = content;
1515     }
1516 
1517     /**
1518      * Compares two XML instructions for equality
1519      *
1520      * Example:
1521      * --------------
1522      * XMLInstruction item1,item2;
1523      * if (item1 == item2) { }
1524      * --------------
1525      */
1526     override bool opEquals(scope const Object o) const
1527     {
1528         const scope item = toType!(const Item)(o);
1529         const t = cast(const XMLInstruction) item;
1530         return t !is null && content == t.content;
1531     }
1532 
1533     /**
1534      * Compares two XML instructions
1535      *
1536      * You should rarely need to call this function. It exists so that
1537      * XmlInstructions can be used as associative array keys.
1538      *
1539      * Example:
1540      * --------------
1541      * XMLInstruction item1,item2;
1542      * if (item1 < item2) { }
1543      * --------------
1544      */
1545     override int opCmp(scope const Object o) scope const
1546     {
1547         const scope item = toType!(const Item)(o);
1548         const t = cast(const XMLInstruction) item;
1549         return t !is null
1550             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1551     }
1552 
1553     /**
1554      * Returns the hash of an XMLInstruction
1555      *
1556      * You should rarely need to call this function. It exists so that
1557      * XmlInstructions can be used as associative array keys.
1558      */
1559     override size_t toHash() scope const nothrow { return hash(content); }
1560 
1561     /**
1562      * Returns a string representation of this XmlInstruction
1563      */
1564     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1565 
1566     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1567 }
1568 
1569 /**
1570  * Class representing a Processing Instruction section
1571  */
1572 class ProcessingInstruction : Item
1573 {
1574     private string content;
1575 
1576     /**
1577      * Construct a Processing Instruction section
1578      *
1579      * Params:
1580      *      content = the body of the instruction segment
1581      *
1582      * Throws: PIException if the segment body is illegal (contains "?>")
1583      *
1584      * Example:
1585      * --------------
1586      * auto item = new ProcessingInstruction("php");
1587      *    // constructs <?php?>
1588      * --------------
1589      */
1590     this(string content) @safe pure
1591     {
1592         import std.string : indexOf;
1593         if (content.indexOf("?>") != -1) throw new PIException(content);
1594         this.content = content;
1595     }
1596 
1597     /**
1598      * Compares two processing instructions for equality
1599      *
1600      * Example:
1601      * --------------
1602      * ProcessingInstruction item1,item2;
1603      * if (item1 == item2) { }
1604      * --------------
1605      */
1606     override bool opEquals(scope const Object o) const
1607     {
1608         const scope item = toType!(const Item)(o);
1609         const t = cast(const ProcessingInstruction) item;
1610         return t !is null && content == t.content;
1611     }
1612 
1613     /**
1614      * Compares two processing instructions
1615      *
1616      * You should rarely need to call this function. It exists so that
1617      * ProcessingInstructions can be used as associative array keys.
1618      *
1619      * Example:
1620      * --------------
1621      * ProcessingInstruction item1,item2;
1622      * if (item1 < item2) { }
1623      * --------------
1624      */
1625     override int opCmp(scope const Object o) scope const
1626     {
1627         const scope item = toType!(const Item)(o);
1628         const t = cast(const ProcessingInstruction) item;
1629         return t !is null
1630             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1631     }
1632 
1633     /**
1634      * Returns the hash of a ProcessingInstruction
1635      *
1636      * You should rarely need to call this function. It exists so that
1637      * ProcessingInstructions can be used as associative array keys.
1638      */
1639     override size_t toHash() scope const nothrow { return hash(content); }
1640 
1641     /**
1642      * Returns a string representation of this ProcessingInstruction
1643      */
1644     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1645 
1646     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1647 }
1648 
1649 /**
1650  * Abstract base class for XML items
1651  */
1652 abstract class Item
1653 {
1654     /// Compares with another Item of same type for equality
1655     abstract override bool opEquals(scope const Object o) @safe const;
1656 
1657     /// Compares with another Item of same type
1658     abstract override int opCmp(scope const Object o) @safe const;
1659 
1660     /// Returns the hash of this item
1661     abstract override size_t toHash() @safe scope const;
1662 
1663     /// Returns a string representation of this item
1664     abstract override string toString() @safe scope const;
1665 
1666     /**
1667      * Returns an indented string representation of this item
1668      *
1669      * Params:
1670      *      indent = number of spaces by which to indent child elements
1671      */
1672     string[] pretty(uint indent) @safe scope const
1673     {
1674         import std.string : strip;
1675         string s = strip(toString());
1676         return s.length == 0 ? [] : [ s ];
1677     }
1678 
1679     /// Returns true if the item represents empty XML text
1680     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1681 }
1682 
1683 /**
1684  * Class for parsing an XML Document.
1685  *
1686  * This is a subclass of ElementParser. Most of the useful functions are
1687  * documented there.
1688  *
1689  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1690  *
1691  * Bugs:
1692  *      Currently only supports UTF documents.
1693  *
1694  *      If there is an encoding attribute in the prolog, it is ignored.
1695  *
1696  */
1697 class DocumentParser : ElementParser
1698 {
1699     string xmlText;
1700 
1701     /**
1702      * Constructs a DocumentParser.
1703      *
1704      * The input to this function MUST be valid XML.
1705      * This is enforced by the function's in contract.
1706      *
1707      * Params:
1708      *      xmlText_ = the entire XML document as text
1709      *
1710      */
1711     this(string xmlText_)
1712     in
1713     {
1714         assert(xmlText_.length != 0);
1715         try
1716         {
1717             // Confirm that the input is valid XML
1718             check(xmlText_);
1719         }
1720         catch (CheckException e)
1721         {
1722             // And if it's not, tell the user why not
1723             assert(false, "\n" ~ e.toString());
1724         }
1725     }
1726     do
1727     {
1728         xmlText = xmlText_;
1729         s = &xmlText;
1730         super();    // Initialize everything
1731         parse();    // Parse through the root tag (but not beyond)
1732     }
1733 }
1734 
1735 @system unittest
1736 {
1737     auto doc = new Document("<root><child><grandchild/></child></root>");
1738     assert(doc.elements.length == 1);
1739     assert(doc.elements[0].tag.name == "child");
1740     assert(doc.items == doc.elements);
1741 }
1742 
1743 /**
1744  * Class for parsing an XML element.
1745  *
1746  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1747  *
1748  * Note that you cannot construct instances of this class directly. You can
1749  * construct a DocumentParser (which is a subclass of ElementParser), but
1750  * otherwise, Instances of ElementParser will be created for you by the
1751  * library, and passed your way via onStartTag handlers.
1752  *
1753  */
1754 class ElementParser
1755 {
1756     alias Handler = void delegate(string);
1757     alias ElementHandler = void delegate(in Element element);
1758     alias ParserHandler = void delegate(ElementParser parser);
1759 
1760     private
1761     {
1762         Tag tag_;
1763         string elementStart;
1764         string* s;
1765 
1766         Handler commentHandler = null;
1767         Handler cdataHandler = null;
1768         Handler xiHandler = null;
1769         Handler piHandler = null;
1770         Handler rawTextHandler = null;
1771         Handler textHandler = null;
1772 
1773         // Private constructor for start tags
1774         this(ElementParser parent) @safe @nogc pure nothrow
1775         {
1776             s = parent.s;
1777             this();
1778             tag_ = parent.tag_;
1779         }
1780 
1781         // Private constructor for empty tags
1782         this(Tag tag, string* t) @safe @nogc pure nothrow
1783         {
1784             s = t;
1785             this();
1786             tag_ = tag;
1787         }
1788     }
1789 
1790     /**
1791      * The Tag at the start of the element being parsed. You can read this to
1792      * determine the tag's name and attributes.
1793      */
1794     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1795 
1796     /**
1797      * Register a handler which will be called whenever a start tag is
1798      * encountered which matches the specified name. You can also pass null as
1799      * the name, in which case the handler will be called for any unmatched
1800      * start tag.
1801      *
1802      * Example:
1803      * --------------
1804      * // Call this function whenever a <podcast> start tag is encountered
1805      * onStartTag["podcast"] = (ElementParser xml)
1806      * {
1807      *     // Your code here
1808      *     //
1809      *     // This is a a closure, so code here may reference
1810      *     // variables which are outside of this scope
1811      * };
1812      *
1813      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1814      * // start tag is encountered
1815      * onStartTag["episode"] = &myEpisodeStartHandler;
1816      *
1817      * // call delegate dg for all other start tags
1818      * onStartTag[null] = dg;
1819      * --------------
1820      *
1821      * This library will supply your function with a new instance of
1822      * ElementHandler, which may be used to parse inside the element whose
1823      * start tag was just found, or to identify the tag attributes of the
1824      * element, etc.
1825      *
1826      * Note that your function will be called for both start tags and empty
1827      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1828      * and &lt;br/&gt;.
1829      */
1830     ParserHandler[string] onStartTag;
1831 
1832     /**
1833      * Register a handler which will be called whenever an end tag is
1834      * encountered which matches the specified name. You can also pass null as
1835      * the name, in which case the handler will be called for any unmatched
1836      * end tag.
1837      *
1838      * Example:
1839      * --------------
1840      * // Call this function whenever a </podcast> end tag is encountered
1841      * onEndTag["podcast"] = (in Element e)
1842      * {
1843      *     // Your code here
1844      *     //
1845      *     // This is a a closure, so code here may reference
1846      *     // variables which are outside of this scope
1847      * };
1848      *
1849      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1850      * // end tag is encountered
1851      * onEndTag["episode"] = &myEpisodeEndHandler;
1852      *
1853      * // call delegate dg for all other end tags
1854      * onEndTag[null] = dg;
1855      * --------------
1856      *
1857      * Note that your function will be called for both start tags and empty
1858      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1859      * and &lt;br/&gt;.
1860      */
1861     ElementHandler[string] onEndTag;
1862 
1863     protected this() @safe @nogc pure nothrow
1864     {
1865         elementStart = *s;
1866     }
1867 
1868     /**
1869      * Register a handler which will be called whenever text is encountered.
1870      *
1871      * Example:
1872      * --------------
1873      * // Call this function whenever text is encountered
1874      * onText = (string s)
1875      * {
1876      *     // Your code here
1877      *
1878      *     // The passed parameter s will have been decoded by the time you see
1879      *     // it, and so may contain any character.
1880      *     //
1881      *     // This is a a closure, so code here may reference
1882      *     // variables which are outside of this scope
1883      * };
1884      * --------------
1885      */
1886     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1887 
1888     /**
1889      * Register an alternative handler which will be called whenever text
1890      * is encountered. This differs from onText in that onText will decode
1891      * the text, whereas onTextRaw will not. This allows you to make design
1892      * choices, since onText will be more accurate, but slower, while
1893      * onTextRaw will be faster, but less accurate. Of course, you can
1894      * still call decode() within your handler, if you want, but you'd
1895      * probably want to use onTextRaw only in circumstances where you
1896      * know that decoding is unnecessary.
1897      *
1898      * Example:
1899      * --------------
1900      * // Call this function whenever text is encountered
1901      * onText = (string s)
1902      * {
1903      *     // Your code here
1904      *
1905      *     // The passed parameter s will NOT have been decoded.
1906      *     //
1907      *     // This is a a closure, so code here may reference
1908      *     // variables which are outside of this scope
1909      * };
1910      * --------------
1911      */
1912     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1913 
1914     /**
1915      * Register a handler which will be called whenever a character data
1916      * segment is encountered.
1917      *
1918      * Example:
1919      * --------------
1920      * // Call this function whenever a CData section is encountered
1921      * onCData = (string s)
1922      * {
1923      *     // Your code here
1924      *
1925      *     // The passed parameter s does not include the opening <![CDATA[
1926      *     // nor closing ]]>
1927      *     //
1928      *     // This is a a closure, so code here may reference
1929      *     // variables which are outside of this scope
1930      * };
1931      * --------------
1932      */
1933     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1934 
1935     /**
1936      * Register a handler which will be called whenever a comment is
1937      * encountered.
1938      *
1939      * Example:
1940      * --------------
1941      * // Call this function whenever a comment is encountered
1942      * onComment = (string s)
1943      * {
1944      *     // Your code here
1945      *
1946      *     // The passed parameter s does not include the opening <!-- nor
1947      *     // closing -->
1948      *     //
1949      *     // This is a a closure, so code here may reference
1950      *     // variables which are outside of this scope
1951      * };
1952      * --------------
1953      */
1954     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1955 
1956     /**
1957      * Register a handler which will be called whenever a processing
1958      * instruction is encountered.
1959      *
1960      * Example:
1961      * --------------
1962      * // Call this function whenever a processing instruction is encountered
1963      * onPI = (string s)
1964      * {
1965      *     // Your code here
1966      *
1967      *     // The passed parameter s does not include the opening <? nor
1968      *     // closing ?>
1969      *     //
1970      *     // This is a a closure, so code here may reference
1971      *     // variables which are outside of this scope
1972      * };
1973      * --------------
1974      */
1975     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1976 
1977     /**
1978      * Register a handler which will be called whenever an XML instruction is
1979      * encountered.
1980      *
1981      * Example:
1982      * --------------
1983      * // Call this function whenever an XML instruction is encountered
1984      * // (Note: XML instructions may only occur preceding the root tag of a
1985      * // document).
1986      * onPI = (string s)
1987      * {
1988      *     // Your code here
1989      *
1990      *     // The passed parameter s does not include the opening <! nor
1991      *     // closing >
1992      *     //
1993      *     // This is a a closure, so code here may reference
1994      *     // variables which are outside of this scope
1995      * };
1996      * --------------
1997      */
1998     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
1999 
2000     /**
2001      * Parse an XML element.
2002      *
2003      * Parsing will continue until the end of the current element. Any items
2004      * encountered for which a handler has been registered will invoke that
2005      * handler.
2006      *
2007      * Throws: various kinds of XMLException
2008      */
2009     void parse()
2010     {
2011         import std.algorithm.searching : startsWith;
2012         import std.string : indexOf;
2013 
2014         string t;
2015         const Tag root = tag_;
2016         Tag[string] startTags;
2017         if (tag_ !is null) startTags[tag_.name] = tag_;
2018 
2019         while (s.length != 0)
2020         {
2021             if (startsWith(*s,"<!--"))
2022             {
2023                 chop(*s,4);
2024                 t = chop(*s,indexOf(*s,"-->"));
2025                 if (commentHandler.funcptr !is null) commentHandler(t);
2026                 chop(*s,3);
2027             }
2028             else if (startsWith(*s,"<![CDATA["))
2029             {
2030                 chop(*s,9);
2031                 t = chop(*s,indexOf(*s,"]]>"));
2032                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2033                 chop(*s,3);
2034             }
2035             else if (startsWith(*s,"<!"))
2036             {
2037                 chop(*s,2);
2038                 t = chop(*s,indexOf(*s,">"));
2039                 if (xiHandler.funcptr !is null) xiHandler(t);
2040                 chop(*s,1);
2041             }
2042             else if (startsWith(*s,"<?"))
2043             {
2044                 chop(*s,2);
2045                 t = chop(*s,indexOf(*s,"?>"));
2046                 if (piHandler.funcptr !is null) piHandler(t);
2047                 chop(*s,2);
2048             }
2049             else if (startsWith(*s,"<"))
2050             {
2051                 tag_ = new Tag(*s,true);
2052                 if (root is null)
2053                     return; // Return to constructor of derived class
2054 
2055                 if (tag_.isStart)
2056                 {
2057                     startTags[tag_.name] = tag_;
2058 
2059                     auto parser = new ElementParser(this);
2060 
2061                     auto handler = tag_.name in onStartTag;
2062                     if (handler !is null) (*handler)(parser);
2063                     else
2064                     {
2065                         handler = null in onStartTag;
2066                         if (handler !is null) (*handler)(parser);
2067                     }
2068                 }
2069                 else if (tag_.isEnd)
2070                 {
2071                     const startTag = startTags[tag_.name];
2072                     string text;
2073 
2074                     if (startTag.tagString.length == 0)
2075                         assert(0);
2076 
2077                     immutable(char)* p = startTag.tagString.ptr
2078                         + startTag.tagString.length;
2079                     immutable(char)* q = &tag_.tagString[0];
2080                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2081 
2082                     auto element = new Element(startTag);
2083                     if (text.length != 0) element ~= new Text(text);
2084 
2085                     auto handler = tag_.name in onEndTag;
2086                     if (handler !is null) (*handler)(element);
2087                     else
2088                     {
2089                         handler = null in onEndTag;
2090                         if (handler !is null) (*handler)(element);
2091                     }
2092 
2093                     if (tag_.name == root.name) return;
2094                 }
2095                 else if (tag_.isEmpty)
2096                 {
2097                     Tag startTag = new Tag(tag_.name);
2098 
2099                     // FIX by hed010gy, for bug 2979
2100                     // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2101                     if (tag_.attr.length > 0)
2102                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2103                     // END FIX
2104 
2105                     // Handle the pretend start tag
2106                     string s2;
2107                     auto parser = new ElementParser(startTag,&s2);
2108                     auto handler1 = startTag.name in onStartTag;
2109                     if (handler1 !is null) (*handler1)(parser);
2110                     else
2111                     {
2112                         handler1 = null in onStartTag;
2113                         if (handler1 !is null) (*handler1)(parser);
2114                     }
2115 
2116                     // Handle the pretend end tag
2117                     auto element = new Element(startTag);
2118                     auto handler2 = tag_.name in onEndTag;
2119                     if (handler2 !is null) (*handler2)(element);
2120                     else
2121                     {
2122                         handler2 = null in onEndTag;
2123                         if (handler2 !is null) (*handler2)(element);
2124                     }
2125                 }
2126             }
2127             else
2128             {
2129                 t = chop(*s,indexOf(*s,"<"));
2130                 if (rawTextHandler.funcptr !is null)
2131                     rawTextHandler(t);
2132                 else if (textHandler.funcptr !is null)
2133                     textHandler(decode(t,DecodeMode.LOOSE));
2134             }
2135         }
2136     }
2137 
2138     /**
2139      * Returns that part of the element which has already been parsed
2140      */
2141     override string toString() const @nogc @safe pure nothrow
2142     {
2143         assert(elementStart.length >= s.length);
2144         return elementStart[0 .. elementStart.length - s.length];
2145     }
2146 
2147 }
2148 
2149 private
2150 {
2151     template Check(string msg)
2152     {
2153         string old = s;
2154 
2155         void fail() @safe pure
2156         {
2157             s = old;
2158             throw new Err(s,msg);
2159         }
2160 
2161         void fail(Err e) @safe pure
2162         {
2163             s = old;
2164             throw new Err(s,msg,e);
2165         }
2166 
2167         void fail(string msg2) @safe pure
2168         {
2169             fail(new Err(s,msg2));
2170         }
2171     }
2172 
2173     void checkMisc(ref string s) @safe pure // rule 27
2174     {
2175         import std.algorithm.searching : startsWith;
2176 
2177         mixin Check!("Misc");
2178 
2179         try
2180         {
2181                  if (s.startsWith("<!--")) { checkComment(s); }
2182             else if (s.startsWith("<?"))   { checkPI(s); }
2183             else                           { checkSpace(s); }
2184         }
2185         catch (Err e) { fail(e); }
2186     }
2187 
2188     void checkDocument(ref string s) @safe pure // rule 1
2189     {
2190         mixin Check!("Document");
2191         try
2192         {
2193             checkProlog(s);
2194             checkElement(s);
2195             star!(checkMisc)(s);
2196         }
2197         catch (Err e) { fail(e); }
2198     }
2199 
2200     void checkChars(ref string s) @safe pure // rule 2
2201     {
2202         // TO DO - Fix std.utf stride and decode functions, then use those
2203         // instead
2204         import std.format : format;
2205 
2206         mixin Check!("Chars");
2207 
2208         dchar c;
2209         ptrdiff_t n = -1;
2210         // 'i' must not be smaller than size_t because size_t is used internally in
2211         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2212         foreach (size_t i, dchar d; s)
2213         {
2214             if (!isChar(d))
2215             {
2216                 c = d;
2217                 n = i;
2218                 break;
2219             }
2220         }
2221         if (n != -1)
2222         {
2223             s = s[n..$];
2224             fail(format("invalid character: U+%04X",c));
2225         }
2226     }
2227 
2228     void checkSpace(ref string s) @safe pure // rule 3
2229     {
2230         import std.algorithm.searching : countUntil;
2231         import std.ascii : isWhite;
2232         import std.utf : byCodeUnit;
2233 
2234         mixin Check!("Whitespace");
2235         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2236         if (i == -1 && s.length > 0 && isWhite(s[0]))
2237             s = s[$ .. $];
2238         else if (i > -1)
2239             s = s[i .. $];
2240         if (s is old) fail();
2241     }
2242 
2243     void checkName(ref string s, out string name) @safe pure // rule 5
2244     {
2245         mixin Check!("Name");
2246 
2247         if (s.length == 0) fail();
2248         ptrdiff_t n;
2249         // 'i' must not be smaller than size_t because size_t is used internally in
2250         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2251         foreach (size_t i, dchar c; s)
2252         {
2253             if (c == '_' || c == ':' || isLetter(c)) continue;
2254             if (i == 0) fail();
2255             if (c == '-' || c == '.' || isDigit(c)
2256                 || isCombiningChar(c) || isExtender(c)) continue;
2257             n = i;
2258             break;
2259         }
2260         name = s[0 .. n];
2261         s = s[n..$];
2262     }
2263 
2264     void checkAttValue(ref string s) @safe pure // rule 10
2265     {
2266         import std.algorithm.searching : countUntil;
2267         import std.utf : byCodeUnit;
2268 
2269         mixin Check!("AttValue");
2270 
2271         if (s.length == 0) fail();
2272         char c = s[0];
2273         if (c != '\u0022' && c != '\u0027')
2274             fail("attribute value requires quotes");
2275         s = s[1..$];
2276         for (;;)
2277         {
2278             s = s[s.byCodeUnit.countUntil(c) .. $];
2279             if (s.length == 0) fail("unterminated attribute value");
2280             if (s[0] == '<') fail("< found in attribute value");
2281             if (s[0] == c) break;
2282             try { checkReference(s); } catch (Err e) { fail(e); }
2283         }
2284         s = s[1..$];
2285     }
2286 
2287     void checkCharData(ref string s) @safe pure // rule 14
2288     {
2289         import std.algorithm.searching : startsWith;
2290 
2291         mixin Check!("CharData");
2292 
2293         while (s.length != 0)
2294         {
2295             if (s.startsWith("&")) break;
2296             if (s.startsWith("<")) break;
2297             if (s.startsWith("]]>")) fail("]]> found within char data");
2298             s = s[1..$];
2299         }
2300     }
2301 
2302     void checkComment(ref string s) @safe pure // rule 15
2303     {
2304         import std.string : indexOf;
2305 
2306         mixin Check!("Comment");
2307 
2308         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2309         ptrdiff_t n = s.indexOf("--");
2310         if (n == -1) fail("unterminated comment");
2311         s = s[n..$];
2312         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2313     }
2314 
2315     void checkPI(ref string s) @safe pure // rule 16
2316     {
2317         mixin Check!("PI");
2318 
2319         try
2320         {
2321             checkLiteral("<?",s);
2322             checkEnd("?>",s);
2323         }
2324         catch (Err e) { fail(e); }
2325     }
2326 
2327     void checkCDSect(ref string s) @safe pure // rule 18
2328     {
2329         mixin Check!("CDSect");
2330 
2331         try
2332         {
2333             checkLiteral(cdata,s);
2334             checkEnd("]]>",s);
2335         }
2336         catch (Err e) { fail(e); }
2337     }
2338 
2339     void checkProlog(ref string s) @safe pure // rule 22
2340     {
2341         mixin Check!("Prolog");
2342 
2343         try
2344         {
2345             /* The XML declaration is optional
2346              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2347              */
2348             opt!(checkXMLDecl)(s);
2349 
2350             star!(checkMisc)(s);
2351             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2352         }
2353         catch (Err e) { fail(e); }
2354     }
2355 
2356     void checkXMLDecl(ref string s) @safe pure // rule 23
2357     {
2358         mixin Check!("XMLDecl");
2359 
2360         try
2361         {
2362             checkLiteral("<?xml",s);
2363             checkVersionInfo(s);
2364             opt!(checkEncodingDecl)(s);
2365             opt!(checkSDDecl)(s);
2366             opt!(checkSpace)(s);
2367             checkLiteral("?>",s);
2368         }
2369         catch (Err e) { fail(e); }
2370     }
2371 
2372     void checkVersionInfo(ref string s) @safe pure // rule 24
2373     {
2374         mixin Check!("VersionInfo");
2375 
2376         try
2377         {
2378             checkSpace(s);
2379             checkLiteral("version",s);
2380             checkEq(s);
2381             quoted!(checkVersionNum)(s);
2382         }
2383         catch (Err e) { fail(e); }
2384     }
2385 
2386     void checkEq(ref string s) @safe pure // rule 25
2387     {
2388         mixin Check!("Eq");
2389 
2390         try
2391         {
2392             opt!(checkSpace)(s);
2393             checkLiteral("=",s);
2394             opt!(checkSpace)(s);
2395         }
2396         catch (Err e) { fail(e); }
2397     }
2398 
2399     void checkVersionNum(ref string s) @safe pure // rule 26
2400     {
2401         import std.algorithm.searching : countUntil;
2402         import std.utf : byCodeUnit;
2403 
2404         mixin Check!("VersionNum");
2405 
2406         s = s[s.byCodeUnit.countUntil('\"') .. $];
2407         if (s is old) fail();
2408     }
2409 
2410     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2411     {
2412         mixin Check!("DocTypeDecl");
2413 
2414         try
2415         {
2416             checkLiteral("<!DOCTYPE",s);
2417             //
2418             // TO DO -- ensure DOCTYPE is well formed
2419             // (But not yet. That's one of our "future directions")
2420             //
2421             checkEnd(">",s);
2422         }
2423         catch (Err e) { fail(e); }
2424     }
2425 
2426     void checkSDDecl(ref string s) @safe pure // rule 32
2427     {
2428         import std.algorithm.searching : startsWith;
2429 
2430         mixin Check!("SDDecl");
2431 
2432         try
2433         {
2434             checkSpace(s);
2435             checkLiteral("standalone",s);
2436             checkEq(s);
2437         }
2438         catch (Err e) { fail(e); }
2439 
2440         int n = 0;
2441              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2442         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2443         else fail("standalone attribute value must be 'yes', \"yes\","~
2444             " 'no' or \"no\"");
2445         s = s[n..$];
2446     }
2447 
2448     void checkElement(ref string s) @safe pure // rule 39
2449     {
2450         mixin Check!("Element");
2451 
2452         string sname,ename,t;
2453         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2454 
2455         if (t == "STag")
2456         {
2457             try
2458             {
2459                 checkContent(s);
2460                 t = s;
2461                 checkETag(s,ename);
2462             }
2463             catch (Err e) { fail(e); }
2464 
2465             if (sname != ename)
2466             {
2467                 s = t;
2468                 fail("end tag name \"" ~ ename
2469                     ~ "\" differs from start tag name \""~sname~"\"");
2470             }
2471         }
2472     }
2473 
2474     // rules 40 and 44
2475     void checkTag(ref string s, out string type, out string name) @safe pure
2476     {
2477         mixin Check!("Tag");
2478 
2479         try
2480         {
2481             type = "STag";
2482             checkLiteral("<",s);
2483             checkName(s,name);
2484             star!(seq!(checkSpace,checkAttribute))(s);
2485             opt!(checkSpace)(s);
2486             if (s.length != 0 && s[0] == '/')
2487             {
2488                 s = s[1..$];
2489                 type = "ETag";
2490             }
2491             checkLiteral(">",s);
2492         }
2493         catch (Err e) { fail(e); }
2494     }
2495 
2496     void checkAttribute(ref string s) @safe pure // rule 41
2497     {
2498         mixin Check!("Attribute");
2499 
2500         try
2501         {
2502             string name;
2503             checkName(s,name);
2504             checkEq(s);
2505             checkAttValue(s);
2506         }
2507         catch (Err e) { fail(e); }
2508     }
2509 
2510     void checkETag(ref string s, out string name) @safe pure // rule 42
2511     {
2512         mixin Check!("ETag");
2513 
2514         try
2515         {
2516             checkLiteral("</",s);
2517             checkName(s,name);
2518             opt!(checkSpace)(s);
2519             checkLiteral(">",s);
2520         }
2521         catch (Err e) { fail(e); }
2522     }
2523 
2524     void checkContent(ref string s) @safe pure // rule 43
2525     {
2526         import std.algorithm.searching : startsWith;
2527 
2528         mixin Check!("Content");
2529 
2530         try
2531         {
2532             while (s.length != 0)
2533             {
2534                 old = s;
2535                      if (s.startsWith("&"))        { checkReference(s); }
2536                 else if (s.startsWith("<!--"))     { checkComment(s); }
2537                 else if (s.startsWith("<?"))       { checkPI(s); }
2538                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2539                 else if (s.startsWith("</"))       { break; }
2540                 else if (s.startsWith("<"))        { checkElement(s); }
2541                 else                               { checkCharData(s); }
2542             }
2543         }
2544         catch (Err e) { fail(e); }
2545     }
2546 
2547     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2548     {
2549         import std.format : format;
2550 
2551         mixin Check!("CharRef");
2552 
2553         c = 0;
2554         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2555         int radix = 10;
2556         if (s.length != 0 && s[0] == 'x')
2557         {
2558             s = s[1..$];
2559             radix = 16;
2560         }
2561         if (s.length == 0) fail("unterminated character reference");
2562         if (s[0] == ';')
2563             fail("character reference must have at least one digit");
2564         while (s.length != 0)
2565         {
2566             immutable char d = s[0];
2567             int n = 0;
2568             switch (d)
2569             {
2570                 case 'F','f': ++n;      goto case;
2571                 case 'E','e': ++n;      goto case;
2572                 case 'D','d': ++n;      goto case;
2573                 case 'C','c': ++n;      goto case;
2574                 case 'B','b': ++n;      goto case;
2575                 case 'A','a': ++n;      goto case;
2576                 case '9':     ++n;      goto case;
2577                 case '8':     ++n;      goto case;
2578                 case '7':     ++n;      goto case;
2579                 case '6':     ++n;      goto case;
2580                 case '5':     ++n;      goto case;
2581                 case '4':     ++n;      goto case;
2582                 case '3':     ++n;      goto case;
2583                 case '2':     ++n;      goto case;
2584                 case '1':     ++n;      goto case;
2585                 case '0':     break;
2586                 default: n = 100; break;
2587             }
2588             if (n >= radix) break;
2589             c *= radix;
2590             c += n;
2591             s = s[1..$];
2592         }
2593         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2594         if (s.length == 0 || s[0] != ';') fail("expected ;");
2595         else s = s[1..$];
2596     }
2597 
2598     void checkReference(ref string s) @safe pure // rule 67
2599     {
2600         import std.algorithm.searching : startsWith;
2601 
2602         mixin Check!("Reference");
2603 
2604         try
2605         {
2606             dchar c;
2607             if (s.startsWith("&#")) checkCharRef(s,c);
2608             else checkEntityRef(s);
2609         }
2610         catch (Err e) { fail(e); }
2611     }
2612 
2613     void checkEntityRef(ref string s) @safe pure // rule 68
2614     {
2615         mixin Check!("EntityRef");
2616 
2617         try
2618         {
2619             string name;
2620             checkLiteral("&",s);
2621             checkName(s,name);
2622             checkLiteral(";",s);
2623         }
2624         catch (Err e) { fail(e); }
2625     }
2626 
2627     void checkEncName(ref string s) @safe pure // rule 81
2628     {
2629         import std.algorithm.searching : countUntil;
2630         import std.ascii : isAlpha;
2631         import std.utf : byCodeUnit;
2632 
2633         mixin Check!("EncName");
2634 
2635         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2636         if (s is old) fail();
2637         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2638     }
2639 
2640     void checkEncodingDecl(ref string s) @safe pure // rule 80
2641     {
2642         mixin Check!("EncodingDecl");
2643 
2644         try
2645         {
2646             checkSpace(s);
2647             checkLiteral("encoding",s);
2648             checkEq(s);
2649             quoted!(checkEncName)(s);
2650         }
2651         catch (Err e) { fail(e); }
2652     }
2653 
2654     // Helper functions
2655 
2656     void checkLiteral(string literal,ref string s) @safe pure
2657     {
2658         import std.string : startsWith;
2659 
2660         mixin Check!("Literal");
2661 
2662         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2663         s = s[literal.length..$];
2664     }
2665 
2666     void checkEnd(string end,ref string s) @safe pure
2667     {
2668         import std.string : indexOf;
2669         // Deliberately no mixin Check here.
2670 
2671         auto n = s.indexOf(end);
2672         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2673         s = s[n..$];
2674         checkLiteral(end,s);
2675     }
2676 
2677     // Metafunctions -- none of these use mixin Check
2678 
2679     void opt(alias f)(ref string s)
2680     {
2681         try { f(s); } catch (Err e) {}
2682     }
2683 
2684     void plus(alias f)(ref string s)
2685     {
2686         f(s);
2687         star!(f)(s);
2688     }
2689 
2690     void star(alias f)(ref string s)
2691     {
2692         while (s.length != 0)
2693         {
2694             try { f(s); }
2695             catch (Err e) { return; }
2696         }
2697     }
2698 
2699     void quoted(alias f)(ref string s)
2700     {
2701         import std.string : startsWith;
2702 
2703         if (s.startsWith("'"))
2704         {
2705             checkLiteral("'",s);
2706             f(s);
2707             checkLiteral("'",s);
2708         }
2709         else
2710         {
2711             checkLiteral("\"",s);
2712             f(s);
2713             checkLiteral("\"",s);
2714         }
2715     }
2716 
2717     void seq(alias f,alias g)(ref string s)
2718     {
2719         f(s);
2720         g(s);
2721     }
2722 }
2723 
2724 /**
2725  * Check an entire XML document for well-formedness
2726  *
2727  * Params:
2728  *      s = the document to be checked, passed as a string
2729  *
2730  * Throws: CheckException if the document is not well formed
2731  *
2732  * CheckException's toString() method will yield the complete hierarchy of
2733  * parse failure (the XML equivalent of a stack trace), giving the line and
2734  * column number of every failure at every level.
2735  */
2736 void check(string s) @safe pure
2737 {
2738     try
2739     {
2740         checkChars(s);
2741         checkDocument(s);
2742         if (s.length != 0) throw new Err(s,"Junk found after document");
2743     }
2744     catch (Err e)
2745     {
2746         e.complete(s);
2747         throw e;
2748     }
2749 }
2750 
2751 @system pure unittest
2752 {
2753     import std.string : indexOf;
2754 
2755     try
2756     {
2757         check(q"[<?xml version="1.0"?>
2758         <catalog>
2759            <book id="bk101">
2760               <author>Gambardella, Matthew</author>
2761               <title>XML Developer's Guide</title>
2762               <genre>Computer</genre>
2763               <price>44.95</price>
2764               <publish_date>2000-10-01</publish_date>
2765               <description>An in-depth look at creating applications
2766               with XML.</description>
2767            </book>
2768            <book id="bk102">
2769               <author>Ralls, Kim</author>
2770               <title>Midnight Rain</title>
2771               <genre>Fantasy</genres>
2772               <price>5.95</price>
2773               <publish_date>2000-12-16</publish_date>
2774               <description>A former architect battles corporate zombies,
2775               an evil sorceress, and her own childhood to become queen
2776               of the world.</description>
2777            </book>
2778            <book id="bk103">
2779               <author>Corets, Eva</author>
2780               <title>Maeve Ascendant</title>
2781               <genre>Fantasy</genre>
2782               <price>5.95</price>
2783               <publish_date>2000-11-17</publish_date>
2784               <description>After the collapse of a nanotechnology
2785               society in England, the young survivors lay the
2786               foundation for a new society.</description>
2787            </book>
2788         </catalog>
2789         ]");
2790         assert(false);
2791     }
2792     catch (CheckException e)
2793     {
2794         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2795                                       " from start tag name \"genre\"");
2796         assert(n != -1);
2797     }
2798 }
2799 
2800 @system unittest
2801 {
2802     string s = q"EOS
2803 <?xml version="1.0"?>
2804 <set>
2805     <one>A</one>
2806     <!-- comment -->
2807     <two>B</two>
2808 </set>
2809 EOS";
2810     try
2811     {
2812         check(s);
2813     }
2814     catch (CheckException e)
2815     {
2816         assert(0, e.toString());
2817     }
2818 }
2819 
2820 @system unittest
2821 {
2822     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2823                         xmlns:stream="http://etherx.'jabber'.org/streams"
2824                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2825                         xml:lang="en" version="1.0" attr='a"b"c'>
2826                         </stream:stream></r>`;
2827 
2828     DocumentParser parser = new DocumentParser(test_xml);
2829     bool tested = false;
2830     parser.onStartTag["stream:stream"] = (ElementParser p) {
2831         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2832         assert(p.tag.attr["from"] == "jid.pl");
2833         assert(p.tag.attr["attr"] == "a\"b\"c");
2834         tested = true;
2835     };
2836     parser.parse();
2837     assert(tested);
2838 }
2839 
2840 @system unittest
2841 {
2842     string s = q"EOS
2843 <?xml version="1.0" encoding="utf-8"?> <Tests>
2844     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2845 </Tests>
2846 EOS";
2847     auto xml = new DocumentParser(s);
2848 
2849     xml.onStartTag["Test"] = (ElementParser xml) {
2850         assert(xml.tag.attr["thing"] == "What & Up");
2851     };
2852 
2853     xml.onEndTag["Test"] = (in Element e) {
2854         assert(e.text() == "What & Up Second");
2855     };
2856     xml.parse();
2857 }
2858 
2859 @system unittest
2860 {
2861     string s = `<tag attr="&quot;value&gt;" />`;
2862     auto doc = new Document(s);
2863     assert(doc.toString() == s);
2864 }
2865 
2866 /** The base class for exceptions thrown by this module */
2867 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2868 
2869 // Other exceptions
2870 
2871 /// Thrown during Comment constructor
2872 class CommentException : XMLException
2873 { private this(string msg) @safe pure { super(msg); } }
2874 
2875 /// Thrown during CData constructor
2876 class CDataException : XMLException
2877 { private this(string msg) @safe pure { super(msg); } }
2878 
2879 /// Thrown during XMLInstruction constructor
2880 class XIException : XMLException
2881 { private this(string msg) @safe pure { super(msg); } }
2882 
2883 /// Thrown during ProcessingInstruction constructor
2884 class PIException : XMLException
2885 { private this(string msg) @safe pure { super(msg); } }
2886 
2887 /// Thrown during Text constructor
2888 class TextException : XMLException
2889 { private this(string msg) @safe pure { super(msg); } }
2890 
2891 /// Thrown during decode()
2892 class DecodeException : XMLException
2893 { private this(string msg) @safe pure { super(msg); } }
2894 
2895 /// Thrown if comparing with wrong type
2896 class InvalidTypeException : XMLException
2897 { private this(string msg) @safe pure { super(msg); } }
2898 
2899 /// Thrown when parsing for Tags
2900 class TagException : XMLException
2901 { private this(string msg) @safe pure { super(msg); } }
2902 
2903 /**
2904  * Thrown during check()
2905  */
2906 class CheckException : XMLException
2907 {
2908     CheckException err; /// Parent in hierarchy
2909     private string tail;
2910     /**
2911      * Name of production rule which failed to parse,
2912      * or specific error message
2913      */
2914     string msg;
2915     size_t line = 0; /// Line number at which parse failure occurred
2916     size_t column = 0; /// Column number at which parse failure occurred
2917 
2918     private this(string tail,string msg,Err err=null) @safe pure
2919     {
2920         super(null);
2921         this.tail = tail;
2922         this.msg = msg;
2923         this.err = err;
2924     }
2925 
2926     private void complete(string entire) @safe pure
2927     {
2928         import std.string : count, lastIndexOf;
2929         import std.utf : toUTF32;
2930 
2931         string head = entire[0..$-tail.length];
2932         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2933         line = head.count("\n") + 1;
2934         dstring t = toUTF32(head[n..$]);
2935         column = t.length + 1;
2936         if (err !is null) err.complete(entire);
2937     }
2938 
2939     override string toString() const @safe pure
2940     {
2941         import std.format : format;
2942 
2943         string s;
2944         if (line != 0) s = format("Line %d, column %d: ",line,column);
2945         s ~= msg;
2946         s ~= '\n';
2947         if (err !is null) s = err.toString() ~ s;
2948         return s;
2949     }
2950 }
2951 
2952 private alias Err = CheckException;
2953 
2954 // Private helper functions
2955 
2956 private
2957 {
2958     inout(T) toType(T)(inout return scope Object o)
2959     {
2960         T t = cast(T)(o);
2961         if (t is null)
2962         {
2963             throw new InvalidTypeException("Attempt to compare a "
2964                 ~ T.stringof ~ " with an instance of another type");
2965         }
2966         return t;
2967     }
2968 
2969     string chop(ref string s, size_t n) @safe pure nothrow
2970     {
2971         if (n == -1) n = s.length;
2972         string t = s[0 .. n];
2973         s = s[n..$];
2974         return t;
2975     }
2976 
2977     bool optc(ref string s, char c) @safe pure nothrow
2978     {
2979         immutable bool b = s.length != 0 && s[0] == c;
2980         if (b) s = s[1..$];
2981         return b;
2982     }
2983 
2984     void reqc(ref string s, char c) @safe pure
2985     {
2986         if (s.length == 0 || s[0] != c) throw new TagException("");
2987         s = s[1..$];
2988     }
2989 
2990     char requireOneOf(ref string s, string chars) @safe pure
2991     {
2992         import std.string : indexOf;
2993 
2994         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2995             throw new TagException("");
2996         immutable char ch = s[0];
2997         s = s[1..$];
2998         return ch;
2999     }
3000 
3001     alias hash = .hashOf;
3002 
3003     // Definitions from the XML specification
3004     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3005         0x10000,0x10FFFF];
3006     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3007         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3008         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3009         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3010         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3011         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3012         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3013         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3014         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3015         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3016         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3017         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3018         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3019         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3020         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3021         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3022         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3023         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3024         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3025         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3026         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3027         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3028         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3029         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3030         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3031         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3032         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3033         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3034         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3035         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3036         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3037         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3038         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3039         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3040         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3041         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3042         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3043         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3044         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3045         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3046         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3047     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3048     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3049         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3050         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3051         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3052         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3053         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3054         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3055         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3056         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3057         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3058         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3059         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3060         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3061         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3062         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3063         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3064         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3065         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3066         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3067         0x3099,0x3099,0x309A,0x309A];
3068     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3069         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3070         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3071         0x0ED9,0x0F20,0x0F29];
3072     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3073         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3074         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3075 
3076     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3077     {
3078         while (table.length != 0)
3079         {
3080             auto m = (table.length >> 1) & ~1;
3081             if (c < table[m])
3082             {
3083                 table = table[0 .. m];
3084             }
3085             else if (c > table[m+1])
3086             {
3087                 table = table[m+2..$];
3088             }
3089             else return true;
3090         }
3091         return false;
3092     }
3093 
3094     string startOf(string s) @safe nothrow pure
3095     {
3096         string r;
3097         foreach (char c;s)
3098         {
3099             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3100             if (r.length >= 40) { r ~= "___"; break; }
3101         }
3102         return r;
3103     }
3104 
3105     void exit(string s=null)
3106     {
3107         throw new XMLException(s);
3108     }
3109 }