1 // Written in the D programming language.
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will remain until we have a suitable replacement,
6       but be aware that it will not remain long term.)
8 Classes and functions for creating and parsing XML
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
18 Example: This example creates a DOM (Document Object Model) tree
19     from an XML file.
20 ------------------------------------------------------------------------------
21 import undead.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
31 void main()
32 {
33     string s = cast(string) std.file.read("books.xml");
35     // Check for well-formedness
36     check(s);
38     // Make a DOM tree
39     auto doc = new Document(s);
41     // Plain-print it
42     writeln(doc);
43 }
44 ------------------------------------------------------------------------------
46 Example: This example does much the same thing, except that the file is
47     deconstructed and reconstructed by hand. This is more work, but the
48     techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import undead.xml;
51 import std.stdio;
52 import std.string;
54 struct Book
55 {
56     string id;
57     string author;
58     string title;
59     string genre;
60     string price;
61     string pubDate;
62     string description;
63 }
65 void main()
66 {
67     string s = cast(string) std.file.read("books.xml");
69     // Check for well-formedness
70     check(s);
72     // Take it apart
73     Book[] books;
75     auto xml = new DocumentParser(s);
76     xml.onStartTag["book"] = (ElementParser xml)
77     {
78         Book book;
79         book.id = xml.tag.attr["id"];
81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
88         xml.parse();
90         books ~= book;
91     };
92     xml.parse();
94     // Put it back together again;
95     auto doc = new Document(new Tag("catalog"));
96     foreach (book;books)
97     {
98         auto element = new Element("book");
99         element.tag.attr["id"] = book.id;
101         element ~= new Element("author",      book.author);
102         element ~= new Element("title",       book.title);
103         element ~= new Element("genre",       book.genre);
104         element ~= new Element("price",       book.price);
105         element ~= new Element("publish-date",book.pubDate);
106         element ~= new Element("description", book.description);
108         doc ~= element;
109     }
111     // Pretty-print it
112     writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors:   Janice Caron
118 Source:    $(PHOBOSSRC std/xml.d)
119 */
120 /*
121          Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123    (See accompanying file LICENSE_1_0.txt or copy at
124          http://www.boost.org/LICENSE_1_0.txt)
125 */
126 module undead.xml;
128 enum cdata = "<![CDATA[";
130 /**
131  * Returns true if the character is a character according to the XML standard
132  *
133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134  *
135  * Params:
136  *    c = the character to be tested
137  */
138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139 {
140     if (c <= 0xD7FF)
141     {
142         if (c >= 0x20)
143             return true;
144         switch (c)
145         {
146         case 0xA:
147         case 0x9:
148         case 0xD:
149             return true;
150         default:
151             return false;
152         }
153     }
154     else if (0xE000 <= c && c <= 0x10FFFF)
155     {
156         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157             return true;
158     }
159     return false;
160 }
162 @safe @nogc nothrow pure unittest
163 {
164     assert(!isChar(cast(dchar) 0x8));
165     assert( isChar(cast(dchar) 0x9));
166     assert( isChar(cast(dchar) 0xA));
167     assert(!isChar(cast(dchar) 0xB));
168     assert(!isChar(cast(dchar) 0xC));
169     assert( isChar(cast(dchar) 0xD));
170     assert(!isChar(cast(dchar) 0xE));
171     assert(!isChar(cast(dchar) 0x1F));
172     assert( isChar(cast(dchar) 0x20));
173     assert( isChar('J'));
174     assert( isChar(cast(dchar) 0xD7FF));
175     assert(!isChar(cast(dchar) 0xD800));
176     assert(!isChar(cast(dchar) 0xDFFF));
177     assert( isChar(cast(dchar) 0xE000));
178     assert( isChar(cast(dchar) 0xFFFD));
179     assert(!isChar(cast(dchar) 0xFFFE));
180     assert(!isChar(cast(dchar) 0xFFFF));
181     assert( isChar(cast(dchar) 0x10000));
182     assert( isChar(cast(dchar) 0x10FFFF));
183     assert(!isChar(cast(dchar) 0x110000));
185     debug (stdxml_TestHardcodedChecks)
186     {
187         foreach (c; 0 .. dchar.max + 1)
188             assert(isChar(c) == lookup(CharTable, c));
189     }
190 }
192 /**
193  * Returns true if the character is whitespace according to the XML standard
194  *
195  * Only the following characters are considered whitespace in XML - space, tab,
196  * carriage return and linefeed
197  *
198  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199  *
200  * Params:
201  *    c = the character to be tested
202  */
203 bool isSpace(dchar c) @safe @nogc pure nothrow
204 {
205     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206 }
208 /**
209  * Returns true if the character is a digit according to the XML standard
210  *
211  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212  *
213  * Params:
214  *    c = the character to be tested
215  */
216 bool isDigit(dchar c) @safe @nogc pure nothrow
217 {
218     if (c <= 0x0039 && c >= 0x0030)
219         return true;
220     else
221         return lookup(DigitTable,c);
222 }
224 @safe @nogc nothrow pure unittest
225 {
226     debug (stdxml_TestHardcodedChecks)
227     {
228         foreach (c; 0 .. dchar.max + 1)
229             assert(isDigit(c) == lookup(DigitTable, c));
230     }
231 }
233 /**
234  * Returns true if the character is a letter according to the XML standard
235  *
236  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237  *
238  * Params:
239  *    c = the character to be tested
240  */
241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242 {
243     return isIdeographic(c) || isBaseChar(c);
244 }
246 /**
247  * Returns true if the character is an ideographic character according to the
248  * XML standard
249  *
250  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251  *
252  * Params:
253  *    c = the character to be tested
254  */
255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
256 {
257     if (c == 0x3007)
258         return true;
259     if (c <= 0x3029 && c >= 0x3021 )
260         return true;
261     if (c <= 0x9FA5 && c >= 0x4E00)
262         return true;
263     return false;
264 }
266 @safe @nogc nothrow pure unittest
267 {
268     assert(isIdeographic('\u4E00'));
269     assert(isIdeographic('\u9FA5'));
270     assert(isIdeographic('\u3007'));
271     assert(isIdeographic('\u3021'));
272     assert(isIdeographic('\u3029'));
274     debug (stdxml_TestHardcodedChecks)
275     {
276         foreach (c; 0 .. dchar.max + 1)
277             assert(isIdeographic(c) == lookup(IdeographicTable, c));
278     }
279 }
281 /**
282  * Returns true if the character is a base character according to the XML
283  * standard
284  *
285  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286  *
287  * Params:
288  *    c = the character to be tested
289  */
290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
291 {
292     return lookup(BaseCharTable,c);
293 }
295 /**
296  * Returns true if the character is a combining character according to the
297  * XML standard
298  *
299  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300  *
301  * Params:
302  *    c = the character to be tested
303  */
304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305 {
306     return lookup(CombiningCharTable,c);
307 }
309 /**
310  * Returns true if the character is an extender according to the XML standard
311  *
312  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313  *
314  * Params:
315  *    c = the character to be tested
316  */
317 bool isExtender(dchar c) @safe @nogc nothrow pure
318 {
319     return lookup(ExtenderTable,c);
320 }
322 /**
323  * Encodes a string by replacing all characters which need to be escaped with
324  * appropriate predefined XML entities.
325  *
326  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327  * and greater-than), and similarly, decode() unescapes them. These functions
328  * are provided for convenience only. You do not need to use them when using
329  * the undead.xml classes, because then all the encoding and decoding will be done
330  * for you automatically.
331  *
332  * If the string is not modified, the original will be returned.
333  *
334  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335  *
336  * Params:
337  *      s = The string to be encoded
338  *
339  * Returns: The encoded string
340  *
341  * Example:
342  * --------------
343  * writefln(encode("a > b")); // writes "a &gt; b"
344  * --------------
345  */
346 S encode(S)(S s)
347 {
348     import std.array : appender;
350     string r;
351     size_t lastI;
352     auto result = appender!S();
354     foreach (i, c; s)
355     {
356         switch (c)
357         {
358         case '&':  r = "&amp;"; break;
359         case '"':  r = "&quot;"; break;
360         case '\'': r = "&apos;"; break;
361         case '<':  r = "&lt;"; break;
362         case '>':  r = "&gt;"; break;
363         default: continue;
364         }
365         // Replace with r
366         result.put(s[lastI .. i]);
367         result.put(r);
368         lastI = i + 1;
369     }
371     if (!result.data.ptr) return s;
372     result.put(s[lastI .. $]);
373     return result.data;
374 }
376 @safe pure unittest
377 {
378     auto s = "hello";
379     assert(encode(s) is s);
380     assert(encode("a > b") == "a &gt; b", encode("a > b"));
381     assert(encode("a < b") == "a &lt; b");
382     assert(encode("don't") == "don&apos;t");
383     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384     assert(encode("cat & dog") == "cat &amp; dog");
385 }
387 /**
388  * Mode to use for decoding.
389  *
390  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393  */
394 enum DecodeMode
395 {
397 }
399 /**
400  * Decodes a string by unescaping all predefined XML entities.
401  *
402  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403  * and greater-than), and similarly, decode() unescapes them. These functions
404  * are provided for convenience only. You do not need to use them when using
405  * the undead.xml classes, because then all the encoding and decoding will be done
406  * for you automatically.
407  *
408  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409  * &amp;lt; and &amp;gt,
410  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
411  *
412  * If the string does not contain an ampersand, the original will be returned.
413  *
414  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416  * (decode, and throw a DecodeException in the event of an error).
417  *
418  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419  *
420  * Params:
421  *      s = The string to be decoded
422  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423  *
424  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425  *
426  * Returns: The decoded string
427  *
428  * Example:
429  * --------------
430  * writefln(decode("a &gt; b")); // writes "a > b"
431  * --------------
432  */
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434 {
435     import std.algorithm.searching : startsWith;
437     if (mode == DecodeMode.NONE) return s;
439     string buffer;
440     foreach (ref i; 0 .. s.length)
441     {
442         char c = s[i];
443         if (c != '&')
444         {
445             if (buffer.length != 0) buffer ~= c;
446         }
447         else
448         {
449             if (buffer.length == 0)
450             {
451                 buffer = s[0 .. i].dup;
452             }
453             if (startsWith(s[i..$],"&#"))
454             {
455                 try
456                 {
457                     dchar d;
458                     string t = s[i..$];
459                     checkCharRef(t, d);
460                     char[4] temp;
461                     import std.utf : encode;
462                     buffer ~= temp[0 .. encode(temp, d)];
463                     i = s.length - t.length - 1;
464                 }
465                 catch (Err e)
466                 {
467                     if (mode == DecodeMode.STRICT)
468                         throw new DecodeException("Unescaped &");
469                     buffer ~= '&';
470                 }
471             }
472             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
473             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
474             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
476             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
477             else
478             {
479                 if (mode == DecodeMode.STRICT)
480                     throw new DecodeException("Unescaped &");
481                 buffer ~= '&';
482             }
483         }
484     }
485     return (buffer.length == 0) ? s : buffer;
486 }
488 @safe pure unittest
489 {
490     void assertNot(string s) pure
491     {
492         bool b = false;
493         try { decode(s,DecodeMode.STRICT); }
494         catch (DecodeException e) { b = true; }
495         assert(b,s);
496     }
498     // Assert that things that should work, do
499     auto s = "hello";
500     assert(decode(s,                DecodeMode.STRICT) is s);
501     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
502     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
503     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
504     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
506     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
507     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
508     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
509     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
510     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
511     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
512     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
513     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
515     // Assert that things that shouldn't work, don't
516     assertNot("cat & dog");
517     assertNot("a &gt b");
518     assertNot("&#;");
519     assertNot("&#x;");
520     assertNot("&#2G;");
521     assertNot("&#x2G;");
522 }
524 /**
525  * Class representing an XML document.
526  *
527  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528  *
529  */
530 class Document : Element
531 {
532     /**
533      * Contains all text which occurs before the root element.
534      * Defaults to &lt;?xml version="1.0"?&gt;
535      */
536     string prolog = "<?xml version=\"1.0\"?>";
537     /**
538      * Contains all text which occurs after the root element.
539      * Defaults to the empty string
540      */
541     string epilog;
543     /**
544      * Constructs a Document by parsing XML text.
545      *
546      * This function creates a complete DOM (Document Object Model) tree.
547      *
548      * The input to this function MUST be valid XML.
549      * This is enforced by DocumentParser's in contract.
550      *
551      * Params:
552      *      s = the complete XML text.
553      */
554     this(string s)
555     in
556     {
557         assert(s.length != 0);
558     }
559     do
560     {
561         auto xml = new DocumentParser(s);
562         string tagString = xml.tag.tagString;
564         this(xml.tag);
565         prolog = s[0 .. tagString.ptr - s.ptr];
566         parse(xml);
567         epilog = *xml.s;
568     }
570     /**
571      * Constructs a Document from a Tag.
572      *
573      * Params:
574      *      tag = the start tag of the document.
575      */
576     this(const(Tag) tag)
577     {
578         super(tag);
579     }
581     const
582     {
583         /**
584          * Compares two Documents for equality
585          *
586          * Example:
587          * --------------
588          * Document d1,d2;
589          * if (d1 == d2) { }
590          * --------------
591          */
592         override bool opEquals(scope const Object o) const
593         {
594             const doc = toType!(const Document)(o);
595             return prolog == doc.prolog
596                 && (cast(const) this).Element.opEquals(cast(const) doc)
597                 && epilog == doc.epilog;
598         }
600         /**
601          * Compares two Documents
602          *
603          * You should rarely need to call this function. It exists so that
604          * Documents can be used as associative array keys.
605          *
606          * Example:
607          * --------------
608          * Document d1,d2;
609          * if (d1 < d2) { }
610          * --------------
611          */
612         override int opCmp(scope const Object o) scope const
613         {
614             const doc = toType!(const Document)(o);
615             if (prolog != doc.prolog)
616                 return prolog < doc.prolog ? -1 : 1;
617             if (int cmp = this.Element.opCmp(doc))
618                 return cmp;
619             if (epilog != doc.epilog)
620                 return epilog < doc.epilog ? -1 : 1;
621             return 0;
622         }
624         /**
625          * Returns the hash of a Document
626          *
627          * You should rarely need to call this function. It exists so that
628          * Documents can be used as associative array keys.
629          */
630         override size_t toHash() scope const @trusted
631         {
632             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633         }
635         /**
636          * Returns the string representation of a Document. (That is, the
637          * complete XML of a document).
638          */
639         override string toString() scope const @safe
640         {
641             return prolog ~ super.toString() ~ epilog;
642         }
643     }
644 }
646 @system unittest
647 {
648     // https://issues.dlang.org/show_bug.cgi?id=14966
649     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
651     auto a = new Document(xml);
652     auto b = new Document(xml);
653     assert(a == b);
654     assert(!(a < b));
655     int[Document] aa;
656     aa[a] = 1;
657     assert(aa[b] == 1);
659     b ~= new Element("b");
660     assert(a < b);
661     assert(b > a);
662 }
664 /**
665  * Class representing an XML element.
666  *
667  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668  */
669 class Element : Item
670 {
671     Tag tag; /// The start tag of the element
672     Item[] items; /// The element's items
673     Text[] texts; /// The element's text items
674     CData[] cdatas; /// The element's CData items
675     Comment[] comments; /// The element's comments
676     ProcessingInstruction[] pis; /// The element's processing instructions
677     Element[] elements; /// The element's child elements
679     /**
680      * Constructs an Element given a name and a string to be used as a Text
681      * interior.
682      *
683      * Params:
684      *      name = the name of the element.
685      *      interior = (optional) the string interior.
686      *
687      * Example:
688      * -------------------------------------------------------
689      * auto element = new Element("title","Serenity")
690      *     // constructs the element <title>Serenity</title>
691      * -------------------------------------------------------
692      */
693     this(string name, string interior=null) @safe pure
694     {
695         this(new Tag(name));
696         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
697     }
699     /**
700      * Constructs an Element from a Tag.
701      *
702      * Params:
703      *      tag_ = the start or empty tag of the element.
704      */
705     this(const(Tag) tag_) @safe pure
706     {
707         this.tag = new Tag(tag_.name);
708         tag.type = TagType.EMPTY;
709         foreach (k,v;tag_.attr) tag.attr[k] = v;
710         tag.tagString = tag_.tagString;
711     }
713     /**
714      * Append a text item to the interior of this element
715      *
716      * Params:
717      *      item = the item you wish to append.
718      *
719      * Example:
720      * --------------
721      * Element element;
722      * element ~= new Text("hello");
723      * --------------
724      */
725     void opOpAssign(string op)(Text item) @safe pure
726         if (op == "~")
727     {
728         texts ~= item;
729         appendItem(item);
730     }
732     /**
733      * Append a CData item to the interior of this element
734      *
735      * Params:
736      *      item = the item you wish to append.
737      *
738      * Example:
739      * --------------
740      * Element element;
741      * element ~= new CData("hello");
742      * --------------
743      */
744     void opOpAssign(string op)(CData item) @safe pure
745         if (op == "~")
746     {
747         cdatas ~= item;
748         appendItem(item);
749     }
751     /**
752      * Append a comment to the interior of this element
753      *
754      * Params:
755      *      item = the item you wish to append.
756      *
757      * Example:
758      * --------------
759      * Element element;
760      * element ~= new Comment("hello");
761      * --------------
762      */
763     void opOpAssign(string op)(Comment item) @safe pure
764         if (op == "~")
765     {
766         comments ~= item;
767         appendItem(item);
768     }
770     /**
771      * Append a processing instruction to the interior of this element
772      *
773      * Params:
774      *      item = the item you wish to append.
775      *
776      * Example:
777      * --------------
778      * Element element;
779      * element ~= new ProcessingInstruction("hello");
780      * --------------
781      */
782     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
783         if (op == "~")
784     {
785         pis ~= item;
786         appendItem(item);
787     }
789     /**
790      * Append a complete element to the interior of this element
791      *
792      * Params:
793      *      item = the item you wish to append.
794      *
795      * Example:
796      * --------------
797      * Element element;
798      * Element other = new Element("br");
799      * element ~= other;
800      *    // appends element representing <br />
801      * --------------
802      */
803     void opOpAssign(string op)(Element item) @safe pure
804         if (op == "~")
805     {
806         elements ~= item;
807         appendItem(item);
808     }
810     private void appendItem(Item item) @safe pure
811     {
812         items ~= item;
813         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
814             tag.type = TagType.START;
815     }
817     private void parse(ElementParser xml)
818     {
819         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
820         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
821         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
822         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
824         xml.onStartTag[null] = (ElementParser xml)
825         {
826             auto e = new Element(xml.tag);
827             e.parse(xml);
828             opOpAssign!("~")(e);
829         };
831         xml.parse();
832     }
834     /**
835      * Compares two Elements for equality
836      *
837      * Example:
838      * --------------
839      * Element e1,e2;
840      * if (e1 == e2) { }
841      * --------------
842      */
843     override bool opEquals(scope const Object o) const
844     {
845         const element = toType!(const Element)(o);
846         immutable len = items.length;
847         if (len != element.items.length) return false;
848         foreach (i; 0 .. len)
849         {
850             if (!items[i].opEquals(element.items[i])) return false;
851         }
852         return true;
853     }
855     /**
856      * Compares two Elements
857      *
858      * You should rarely need to call this function. It exists so that Elements
859      * can be used as associative array keys.
860      *
861      * Example:
862      * --------------
863      * Element e1,e2;
864      * if (e1 < e2) { }
865      * --------------
866      */
867     override int opCmp(scope const Object o) @safe const
868     {
869         const element = toType!(const Element)(o);
870         for (uint i=0; ; ++i)
871         {
872             if (i == items.length && i == element.items.length) return 0;
873             if (i == items.length) return -1;
874             if (i == element.items.length) return 1;
875             if (!items[i].opEquals(element.items[i]))
876                 return items[i].opCmp(element.items[i]);
877         }
878     }
880     /**
881      * Returns the hash of an Element
882      *
883      * You should rarely need to call this function. It exists so that Elements
884      * can be used as associative array keys.
885      */
886     override size_t toHash() scope const @safe
887     {
888         size_t hash = tag.toHash();
889         foreach (item;items) hash += item.toHash();
890         return hash;
891     }
893     const
894     {
895         /**
896          * Returns the decoded interior of an element.
897          *
898          * The element is assumed to contain text <i>only</i>. So, for
899          * example, given XML such as "&lt;title&gt;Good &amp;amp;
900          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
901          *
902          * Params:
903          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
904          *
905          * Throws: DecodeException if decode fails
906          */
907         string text(DecodeMode mode=DecodeMode.LOOSE)
908         {
909             string buffer;
910             foreach (item;items)
911             {
912                 Text t = cast(Text) item;
913                 if (t is null) throw new DecodeException(item.toString());
914                 buffer ~= decode(t.toString(),mode);
915             }
916             return buffer;
917         }
919         /**
920          * Returns an indented string representation of this item
921          *
922          * Params:
923          *      indent = (optional) number of spaces by which to indent this
924          *          element. Defaults to 2.
925          */
926         override string[] pretty(uint indent=2) scope
927         {
928             import std.algorithm.searching : count;
929             import std.string : rightJustify;
931             if (isEmptyXML) return [ tag.toEmptyString() ];
933             if (items.length == 1)
934             {
935                 auto t = cast(const(Text))(items[0]);
936                 if (t !is null)
937                 {
938                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
939                 }
940             }
942             string[] a = [ tag.toStartString() ];
943             foreach (item;items)
944             {
945                 string[] b = item.pretty(indent);
946                 foreach (s;b)
947                 {
948                     a ~= rightJustify(s,count(s) + indent);
949                 }
950             }
951             a ~= tag.toEndString();
952             return a;
953         }
955         /**
956          * Returns the string representation of an Element
957          *
958          * Example:
959          * --------------
960          * auto element = new Element("br");
961          * writefln(element.toString()); // writes "<br />"
962          * --------------
963          */
964         override string toString() scope @safe
965         {
966             if (isEmptyXML) return tag.toEmptyString();
968             string buffer = tag.toStartString();
969             foreach (item;items) { buffer ~= item.toString(); }
970             buffer ~= tag.toEndString();
971             return buffer;
972         }
974         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
975     }
976 }
978 /**
979  * Tag types.
980  *
981  * $(DDOC_ENUM_MEMBERS START) Used for start tags
982  * $(DDOC_ENUM_MEMBERS END) Used for end tags
983  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
984  *
985  */
986 enum TagType { START, END, EMPTY }
988 /**
989  * Class representing an XML tag.
990  *
991  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
992  *
993  * The class invariant guarantees
994  * <ul>
995  * <li> that $(B type) is a valid enum TagType value</li>
996  * <li> that $(B name) consists of valid characters</li>
997  * <li> that each attribute name consists of valid characters</li>
998  * </ul>
999  */
1000 class Tag
1001 {
1002     TagType type = TagType.START;   /// Type of tag
1003     string name;                    /// Tag name
1004     string[string] attr;            /// Associative array of attributes
1005     private string tagString;
1007     invariant()
1008     {
1009         string s;
1010         string t;
1012         assert(type == TagType.START
1013             || type == TagType.END
1014             || type == TagType.EMPTY);
1016         s = name;
1017         try { checkName(s,t); }
1018         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1020         foreach (k,v;attr)
1021         {
1022             s = k;
1023             try { checkName(s,t); }
1024             catch (Err e)
1025                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1026         }
1027     }
1029     /**
1030      * Constructs an instance of Tag with a specified name and type
1031      *
1032      * The constructor does not initialize the attributes. To initialize the
1033      * attributes, you access the $(B attr) member variable.
1034      *
1035      * Params:
1036      *      name = the Tag's name
1037      *      type = (optional) the Tag's type. If omitted, defaults to
1038      *          TagType.START.
1039      *
1040      * Example:
1041      * --------------
1042      * auto tag = new Tag("img",Tag.EMPTY);
1043      * tag.attr["src"] = "http://example.com/example.jpg";
1044      * --------------
1045      */
1046     this(string name, TagType type=TagType.START) @safe pure
1047     {
1048         this.name = name;
1049         this.type = type;
1050     }
1052     /* Private constructor (so don't ddoc this!)
1053      *
1054      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1055      *
1056      * The string is passed by reference, and is advanced over all characters
1057      * consumed.
1058      *
1059      * The second parameter is a dummy parameter only, required solely to
1060      * distinguish this constructor from the public one.
1061      */
1062     private this(ref string s, bool dummy) @safe pure
1063     {
1064         import std.algorithm.searching : countUntil;
1065         import std.ascii : isWhite;
1066         import std.utf : byCodeUnit;
1068         tagString = s;
1069         try
1070         {
1071             reqc(s,'<');
1072             if (optc(s,'/')) type = TagType.END;
1073             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1074             name = s[0 .. i];
1075             s = s[i .. $];
1077             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1078             s = s[i .. $];
1080             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1081             {
1082                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1083                 string key = s[0 .. i];
1084                 s = s[i .. $];
1086                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1087                 s = s[i .. $];
1088                 reqc(s,'=');
1089                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1090                 s = s[i .. $];
1092                 immutable char quote = requireOneOf(s,"'\"");
1093                 i = s.byCodeUnit.countUntil(quote);
1094                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1095                 s = s[i .. $];
1096                 reqc(s,quote);
1098                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1099                 s = s[i .. $];
1100                 attr[key] = val;
1101             }
1102             if (optc(s,'/'))
1103             {
1104                 if (type == TagType.END) throw new TagException("");
1105                 type = TagType.EMPTY;
1106             }
1107             reqc(s,'>');
1108             tagString.length = tagString.length - s.length;
1109         }
1110         catch (XMLException e)
1111         {
1112             tagString.length = tagString.length - s.length;
1113             throw new TagException(tagString);
1114         }
1115     }
1117     const
1118     {
1119         /**
1120          * Compares two Tags for equality
1121          *
1122          * You should rarely need to call this function. It exists so that Tags
1123          * can be used as associative array keys.
1124          *
1125          * Example:
1126          * --------------
1127          * Tag tag1,tag2
1128          * if (tag1 == tag2) { }
1129          * --------------
1130          */
1131         override bool opEquals(scope Object o)
1132         {
1133             const tag = toType!(const Tag)(o);
1134             return
1135                 (name != tag.name) ? false : (
1136                 (attr != tag.attr) ? false : (
1137                 (type != tag.type) ? false : (
1138             true )));
1139         }
1141         /**
1142          * Compares two Tags
1143          *
1144          * Example:
1145          * --------------
1146          * Tag tag1,tag2
1147          * if (tag1 < tag2) { }
1148          * --------------
1149          */
1150         override int opCmp(Object o)
1151         {
1152             const tag = toType!(const Tag)(o);
1153             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1154             return
1155                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1156                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1157                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1158             0 )));
1159         }
1161         /**
1162          * Returns the hash of a Tag
1163          *
1164          * You should rarely need to call this function. It exists so that Tags
1165          * can be used as associative array keys.
1166          */
1167         override size_t toHash()
1168         {
1169             return .hashOf(name);
1170         }
1172         /**
1173          * Returns the string representation of a Tag
1174          *
1175          * Example:
1176          * --------------
1177          * auto tag = new Tag("book",TagType.START);
1178          * writefln(tag.toString()); // writes "<book>"
1179          * --------------
1180          */
1181         override string toString() @safe
1182         {
1183             if (isEmpty) return toEmptyString();
1184             return (isEnd) ? toEndString() : toStartString();
1185         }
1187         private
1188         {
1189             string toNonEndString() @safe
1190             {
1191                 import std.format : format;
1193                 string s = "<" ~ name;
1194                 foreach (key,val;attr)
1195                     s ~= format(" %s=\"%s\"",key,encode(val));
1196                 return s;
1197             }
1199             string toStartString() @safe { return toNonEndString() ~ ">"; }
1201             string toEndString() @safe { return "</" ~ name ~ ">"; }
1203             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1204         }
1206         /**
1207          * Returns true if the Tag is a start tag
1208          *
1209          * Example:
1210          * --------------
1211          * if (tag.isStart) { }
1212          * --------------
1213          */
1214         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1216         /**
1217          * Returns true if the Tag is an end tag
1218          *
1219          * Example:
1220          * --------------
1221          * if (tag.isEnd) { }
1222          * --------------
1223          */
1224         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1226         /**
1227          * Returns true if the Tag is an empty tag
1228          *
1229          * Example:
1230          * --------------
1231          * if (tag.isEmpty) { }
1232          * --------------
1233          */
1234         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1235     }
1236 }
1238 /**
1239  * Class representing a comment
1240  */
1241 class Comment : Item
1242 {
1243     private string content;
1245     /**
1246      * Construct a comment
1247      *
1248      * Params:
1249      *      content = the body of the comment
1250      *
1251      * Throws: CommentException if the comment body is illegal (contains "--"
1252      * or exactly equals "-")
1253      *
1254      * Example:
1255      * --------------
1256      * auto item = new Comment("This is a comment");
1257      *    // constructs <!--This is a comment-->
1258      * --------------
1259      */
1260     this(string content) @safe pure
1261     {
1262         import std.string : indexOf;
1264         if (content == "-" || content.indexOf("--") != -1)
1265             throw new CommentException(content);
1266         this.content = content;
1267     }
1269     /**
1270      * Compares two comments for equality
1271      *
1272      * Example:
1273      * --------------
1274      * Comment item1,item2;
1275      * if (item1 == item2) { }
1276      * --------------
1277      */
1278     override bool opEquals(scope const Object o) const
1279     {
1280         const item = toType!(const Item)(o);
1281         const t = cast(const Comment) item;
1282         return t !is null && content == t.content;
1283     }
1285     /**
1286      * Compares two comments
1287      *
1288      * You should rarely need to call this function. It exists so that Comments
1289      * can be used as associative array keys.
1290      *
1291      * Example:
1292      * --------------
1293      * Comment item1,item2;
1294      * if (item1 < item2) { }
1295      * --------------
1296      */
1297     override int opCmp(scope const Object o) scope const
1298     {
1299         const item = toType!(const Item)(o);
1300         const t = cast(const Comment) item;
1301         return t !is null && (content != t.content
1302             ? (content < t.content ? -1 : 1 ) : 0 );
1303     }
1305     /**
1306      * Returns the hash of a Comment
1307      *
1308      * You should rarely need to call this function. It exists so that Comments
1309      * can be used as associative array keys.
1310      */
1311     override size_t toHash() scope const nothrow { return hash(content); }
1313     /**
1314      * Returns a string representation of this comment
1315      */
1316     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1318     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1319 }
1321 @safe unittest // issue 16241
1322 {
1323     import std.exception : assertThrown;
1324     auto c = new Comment("==");
1325     assert(c.content == "==");
1326     assertThrown!CommentException(new Comment("--"));
1327 }
1329 /**
1330  * Class representing a Character Data section
1331  */
1332 class CData : Item
1333 {
1334     private string content;
1336     /**
1337      * Construct a character data section
1338      *
1339      * Params:
1340      *      content = the body of the character data segment
1341      *
1342      * Throws: CDataException if the segment body is illegal (contains "]]>")
1343      *
1344      * Example:
1345      * --------------
1346      * auto item = new CData("<b>hello</b>");
1347      *    // constructs <![CDATA[<b>hello</b>]]>
1348      * --------------
1349      */
1350     this(string content) @safe pure
1351     {
1352         import std.string : indexOf;
1353         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1354         this.content = content;
1355     }
1357     /**
1358      * Compares two CDatas for equality
1359      *
1360      * Example:
1361      * --------------
1362      * CData item1,item2;
1363      * if (item1 == item2) { }
1364      * --------------
1365      */
1366     override bool opEquals(scope const Object o) const
1367     {
1368         const item = toType!(const Item)(o);
1369         const t = cast(const CData) item;
1370         return t !is null && content == t.content;
1371     }
1373     /**
1374      * Compares two CDatas
1375      *
1376      * You should rarely need to call this function. It exists so that CDatas
1377      * can be used as associative array keys.
1378      *
1379      * Example:
1380      * --------------
1381      * CData item1,item2;
1382      * if (item1 < item2) { }
1383      * --------------
1384      */
1385     override int opCmp(scope const Object o) scope const
1386     {
1387         const item = toType!(const Item)(o);
1388         const t = cast(const CData) item;
1389         return t !is null && (content != t.content
1390             ? (content < t.content ? -1 : 1 ) : 0 );
1391     }
1393     /**
1394      * Returns the hash of a CData
1395      *
1396      * You should rarely need to call this function. It exists so that CDatas
1397      * can be used as associative array keys.
1398      */
1399     override size_t toHash() scope const nothrow { return hash(content); }
1401     /**
1402      * Returns a string representation of this CData section
1403      */
1404     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1406     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1407 }
1409 /**
1410  * Class representing a text (aka Parsed Character Data) section
1411  */
1412 class Text : Item
1413 {
1414     private string content;
1416     /**
1417      * Construct a text (aka PCData) section
1418      *
1419      * Params:
1420      *      content = the text. This function encodes the text before
1421      *      insertion, so it is safe to insert any text
1422      *
1423      * Example:
1424      * --------------
1425      * auto Text = new CData("a < b");
1426      *    // constructs a &lt; b
1427      * --------------
1428      */
1429     this(string content) @safe pure
1430     {
1431         this.content = encode(content);
1432     }
1434     /**
1435      * Compares two text sections for equality
1436      *
1437      * Example:
1438      * --------------
1439      * Text item1,item2;
1440      * if (item1 == item2) { }
1441      * --------------
1442      */
1443     override bool opEquals(scope const Object o) const
1444     {
1445         const item = toType!(const Item)(o);
1446         const t = cast(const Text) item;
1447         return t !is null && content == t.content;
1448     }
1450     /**
1451      * Compares two text sections
1452      *
1453      * You should rarely need to call this function. It exists so that Texts
1454      * can be used as associative array keys.
1455      *
1456      * Example:
1457      * --------------
1458      * Text item1,item2;
1459      * if (item1 < item2) { }
1460      * --------------
1461      */
1462     override int opCmp(scope const Object o) scope const
1463     {
1464         const item = toType!(const Item)(o);
1465         const t = cast(const Text) item;
1466         return t !is null
1467             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1468     }
1470     /**
1471      * Returns the hash of a text section
1472      *
1473      * You should rarely need to call this function. It exists so that Texts
1474      * can be used as associative array keys.
1475      */
1476     override size_t toHash() scope const nothrow { return hash(content); }
1478     /**
1479      * Returns a string representation of this Text section
1480      */
1481     override string toString() scope const @safe @nogc pure nothrow { return content; }
1483     /**
1484      * Returns true if the content is the empty string
1485      */
1486     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1487 }
1489 /**
1490  * Class representing an XML Instruction section
1491  */
1492 class XMLInstruction : Item
1493 {
1494     private string content;
1496     /**
1497      * Construct an XML Instruction section
1498      *
1499      * Params:
1500      *      content = the body of the instruction segment
1501      *
1502      * Throws: XIException if the segment body is illegal (contains ">")
1503      *
1504      * Example:
1505      * --------------
1506      * auto item = new XMLInstruction("ATTLIST");
1507      *    // constructs <!ATTLIST>
1508      * --------------
1509      */
1510     this(string content) @safe pure
1511     {
1512         import std.string : indexOf;
1513         if (content.indexOf(">") != -1) throw new XIException(content);
1514         this.content = content;
1515     }
1517     /**
1518      * Compares two XML instructions for equality
1519      *
1520      * Example:
1521      * --------------
1522      * XMLInstruction item1,item2;
1523      * if (item1 == item2) { }
1524      * --------------
1525      */
1526     override bool opEquals(scope const Object o) const
1527     {
1528         const item = toType!(const Item)(o);
1529         const t = cast(const XMLInstruction) item;
1530         return t !is null && content == t.content;
1531     }
1533     /**
1534      * Compares two XML instructions
1535      *
1536      * You should rarely need to call this function. It exists so that
1537      * XmlInstructions can be used as associative array keys.
1538      *
1539      * Example:
1540      * --------------
1541      * XMLInstruction item1,item2;
1542      * if (item1 < item2) { }
1543      * --------------
1544      */
1545     override int opCmp(scope const Object o) scope const
1546     {
1547         const item = toType!(const Item)(o);
1548         const t = cast(const XMLInstruction) item;
1549         return t !is null
1550             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1551     }
1553     /**
1554      * Returns the hash of an XMLInstruction
1555      *
1556      * You should rarely need to call this function. It exists so that
1557      * XmlInstructions can be used as associative array keys.
1558      */
1559     override size_t toHash() scope const nothrow { return hash(content); }
1561     /**
1562      * Returns a string representation of this XmlInstruction
1563      */
1564     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1566     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1567 }
1569 /**
1570  * Class representing a Processing Instruction section
1571  */
1572 class ProcessingInstruction : Item
1573 {
1574     private string content;
1576     /**
1577      * Construct a Processing Instruction section
1578      *
1579      * Params:
1580      *      content = the body of the instruction segment
1581      *
1582      * Throws: PIException if the segment body is illegal (contains "?>")
1583      *
1584      * Example:
1585      * --------------
1586      * auto item = new ProcessingInstruction("php");
1587      *    // constructs <?php?>
1588      * --------------
1589      */
1590     this(string content) @safe pure
1591     {
1592         import std.string : indexOf;
1593         if (content.indexOf("?>") != -1) throw new PIException(content);
1594         this.content = content;
1595     }
1597     /**
1598      * Compares two processing instructions for equality
1599      *
1600      * Example:
1601      * --------------
1602      * ProcessingInstruction item1,item2;
1603      * if (item1 == item2) { }
1604      * --------------
1605      */
1606     override bool opEquals(scope const Object o) const
1607     {
1608         const item = toType!(const Item)(o);
1609         const t = cast(const ProcessingInstruction) item;
1610         return t !is null && content == t.content;
1611     }
1613     /**
1614      * Compares two processing instructions
1615      *
1616      * You should rarely need to call this function. It exists so that
1617      * ProcessingInstructions can be used as associative array keys.
1618      *
1619      * Example:
1620      * --------------
1621      * ProcessingInstruction item1,item2;
1622      * if (item1 < item2) { }
1623      * --------------
1624      */
1625     override int opCmp(scope const Object o) scope const
1626     {
1627         const item = toType!(const Item)(o);
1628         const t = cast(const ProcessingInstruction) item;
1629         return t !is null
1630             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1631     }
1633     /**
1634      * Returns the hash of a ProcessingInstruction
1635      *
1636      * You should rarely need to call this function. It exists so that
1637      * ProcessingInstructions can be used as associative array keys.
1638      */
1639     override size_t toHash() scope const nothrow { return hash(content); }
1641     /**
1642      * Returns a string representation of this ProcessingInstruction
1643      */
1644     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1646     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1647 }
1649 /**
1650  * Abstract base class for XML items
1651  */
1652 abstract class Item
1653 {
1654     /// Compares with another Item of same type for equality
1655     abstract override bool opEquals(scope const Object o) @safe const;
1657     /// Compares with another Item of same type
1658     abstract override int opCmp(scope const Object o) @safe const;
1660     /// Returns the hash of this item
1661     abstract override size_t toHash() @safe scope const;
1663     /// Returns a string representation of this item
1664     abstract override string toString() @safe scope const;
1666     /**
1667      * Returns an indented string representation of this item
1668      *
1669      * Params:
1670      *      indent = number of spaces by which to indent child elements
1671      */
1672     string[] pretty(uint indent) @safe scope const
1673     {
1674         import std.string : strip;
1675         string s = strip(toString());
1676         return s.length == 0 ? [] : [ s ];
1677     }
1679     /// Returns true if the item represents empty XML text
1680     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1681 }
1683 /**
1684  * Class for parsing an XML Document.
1685  *
1686  * This is a subclass of ElementParser. Most of the useful functions are
1687  * documented there.
1688  *
1689  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1690  *
1691  * Bugs:
1692  *      Currently only supports UTF documents.
1693  *
1694  *      If there is an encoding attribute in the prolog, it is ignored.
1695  *
1696  */
1697 class DocumentParser : ElementParser
1698 {
1699     string xmlText;
1701     /**
1702      * Constructs a DocumentParser.
1703      *
1704      * The input to this function MUST be valid XML.
1705      * This is enforced by the function's in contract.
1706      *
1707      * Params:
1708      *      xmlText_ = the entire XML document as text
1709      *
1710      */
1711     this(string xmlText_)
1712     in
1713     {
1714         assert(xmlText_.length != 0);
1715         try
1716         {
1717             // Confirm that the input is valid XML
1718             check(xmlText_);
1719         }
1720         catch (CheckException e)
1721         {
1722             // And if it's not, tell the user why not
1723             assert(false, "\n" ~ e.toString());
1724         }
1725     }
1726     do
1727     {
1728         xmlText = xmlText_;
1729         s = &xmlText;
1730         super();    // Initialize everything
1731         parse();    // Parse through the root tag (but not beyond)
1732     }
1733 }
1735 @system unittest
1736 {
1737     auto doc = new Document("<root><child><grandchild/></child></root>");
1738     assert(doc.elements.length == 1);
1739     assert(doc.elements[0].tag.name == "child");
1740     assert(doc.items == doc.elements);
1741 }
1743 /**
1744  * Class for parsing an XML element.
1745  *
1746  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1747  *
1748  * Note that you cannot construct instances of this class directly. You can
1749  * construct a DocumentParser (which is a subclass of ElementParser), but
1750  * otherwise, Instances of ElementParser will be created for you by the
1751  * library, and passed your way via onStartTag handlers.
1752  *
1753  */
1754 class ElementParser
1755 {
1756     alias Handler = void delegate(string);
1757     alias ElementHandler = void delegate(in Element element);
1758     alias ParserHandler = void delegate(ElementParser parser);
1760     private
1761     {
1762         Tag tag_;
1763         string elementStart;
1764         string* s;
1766         Handler commentHandler = null;
1767         Handler cdataHandler = null;
1768         Handler xiHandler = null;
1769         Handler piHandler = null;
1770         Handler rawTextHandler = null;
1771         Handler textHandler = null;
1773         // Private constructor for start tags
1774         this(ElementParser parent) @safe @nogc pure nothrow
1775         {
1776             s = parent.s;
1777             this();
1778             tag_ = parent.tag_;
1779         }
1781         // Private constructor for empty tags
1782         this(Tag tag, string* t) @safe @nogc pure nothrow
1783         {
1784             s = t;
1785             this();
1786             tag_ = tag;
1787         }
1788     }
1790     /**
1791      * The Tag at the start of the element being parsed. You can read this to
1792      * determine the tag's name and attributes.
1793      */
1794     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1796     /**
1797      * Register a handler which will be called whenever a start tag is
1798      * encountered which matches the specified name. You can also pass null as
1799      * the name, in which case the handler will be called for any unmatched
1800      * start tag.
1801      *
1802      * Example:
1803      * --------------
1804      * // Call this function whenever a <podcast> start tag is encountered
1805      * onStartTag["podcast"] = (ElementParser xml)
1806      * {
1807      *     // Your code here
1808      *     //
1809      *     // This is a a closure, so code here may reference
1810      *     // variables which are outside of this scope
1811      * };
1812      *
1813      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1814      * // start tag is encountered
1815      * onStartTag["episode"] = &myEpisodeStartHandler;
1816      *
1817      * // call delegate dg for all other start tags
1818      * onStartTag[null] = dg;
1819      * --------------
1820      *
1821      * This library will supply your function with a new instance of
1822      * ElementHandler, which may be used to parse inside the element whose
1823      * start tag was just found, or to identify the tag attributes of the
1824      * element, etc.
1825      *
1826      * Note that your function will be called for both start tags and empty
1827      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1828      * and &lt;br/&gt;.
1829      */
1830     ParserHandler[string] onStartTag;
1832     /**
1833      * Register a handler which will be called whenever an end tag is
1834      * encountered which matches the specified name. You can also pass null as
1835      * the name, in which case the handler will be called for any unmatched
1836      * end tag.
1837      *
1838      * Example:
1839      * --------------
1840      * // Call this function whenever a </podcast> end tag is encountered
1841      * onEndTag["podcast"] = (in Element e)
1842      * {
1843      *     // Your code here
1844      *     //
1845      *     // This is a a closure, so code here may reference
1846      *     // variables which are outside of this scope
1847      * };
1848      *
1849      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1850      * // end tag is encountered
1851      * onEndTag["episode"] = &myEpisodeEndHandler;
1852      *
1853      * // call delegate dg for all other end tags
1854      * onEndTag[null] = dg;
1855      * --------------
1856      *
1857      * Note that your function will be called for both start tags and empty
1858      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1859      * and &lt;br/&gt;.
1860      */
1861     ElementHandler[string] onEndTag;
1863     protected this() @safe @nogc pure nothrow
1864     {
1865         elementStart = *s;
1866     }
1868     /**
1869      * Register a handler which will be called whenever text is encountered.
1870      *
1871      * Example:
1872      * --------------
1873      * // Call this function whenever text is encountered
1874      * onText = (string s)
1875      * {
1876      *     // Your code here
1877      *
1878      *     // The passed parameter s will have been decoded by the time you see
1879      *     // it, and so may contain any character.
1880      *     //
1881      *     // This is a a closure, so code here may reference
1882      *     // variables which are outside of this scope
1883      * };
1884      * --------------
1885      */
1886     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1888     /**
1889      * Register an alternative handler which will be called whenever text
1890      * is encountered. This differs from onText in that onText will decode
1891      * the text, whereas onTextRaw will not. This allows you to make design
1892      * choices, since onText will be more accurate, but slower, while
1893      * onTextRaw will be faster, but less accurate. Of course, you can
1894      * still call decode() within your handler, if you want, but you'd
1895      * probably want to use onTextRaw only in circumstances where you
1896      * know that decoding is unnecessary.
1897      *
1898      * Example:
1899      * --------------
1900      * // Call this function whenever text is encountered
1901      * onText = (string s)
1902      * {
1903      *     // Your code here
1904      *
1905      *     // The passed parameter s will NOT have been decoded.
1906      *     //
1907      *     // This is a a closure, so code here may reference
1908      *     // variables which are outside of this scope
1909      * };
1910      * --------------
1911      */
1912     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1914     /**
1915      * Register a handler which will be called whenever a character data
1916      * segment is encountered.
1917      *
1918      * Example:
1919      * --------------
1920      * // Call this function whenever a CData section is encountered
1921      * onCData = (string s)
1922      * {
1923      *     // Your code here
1924      *
1925      *     // The passed parameter s does not include the opening <![CDATA[
1926      *     // nor closing ]]>
1927      *     //
1928      *     // This is a a closure, so code here may reference
1929      *     // variables which are outside of this scope
1930      * };
1931      * --------------
1932      */
1933     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1935     /**
1936      * Register a handler which will be called whenever a comment is
1937      * encountered.
1938      *
1939      * Example:
1940      * --------------
1941      * // Call this function whenever a comment is encountered
1942      * onComment = (string s)
1943      * {
1944      *     // Your code here
1945      *
1946      *     // The passed parameter s does not include the opening <!-- nor
1947      *     // closing -->
1948      *     //
1949      *     // This is a a closure, so code here may reference
1950      *     // variables which are outside of this scope
1951      * };
1952      * --------------
1953      */
1954     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1956     /**
1957      * Register a handler which will be called whenever a processing
1958      * instruction is encountered.
1959      *
1960      * Example:
1961      * --------------
1962      * // Call this function whenever a processing instruction is encountered
1963      * onPI = (string s)
1964      * {
1965      *     // Your code here
1966      *
1967      *     // The passed parameter s does not include the opening <? nor
1968      *     // closing ?>
1969      *     //
1970      *     // This is a a closure, so code here may reference
1971      *     // variables which are outside of this scope
1972      * };
1973      * --------------
1974      */
1975     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1977     /**
1978      * Register a handler which will be called whenever an XML instruction is
1979      * encountered.
1980      *
1981      * Example:
1982      * --------------
1983      * // Call this function whenever an XML instruction is encountered
1984      * // (Note: XML instructions may only occur preceding the root tag of a
1985      * // document).
1986      * onPI = (string s)
1987      * {
1988      *     // Your code here
1989      *
1990      *     // The passed parameter s does not include the opening <! nor
1991      *     // closing >
1992      *     //
1993      *     // This is a a closure, so code here may reference
1994      *     // variables which are outside of this scope
1995      * };
1996      * --------------
1997      */
1998     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2000     /**
2001      * Parse an XML element.
2002      *
2003      * Parsing will continue until the end of the current element. Any items
2004      * encountered for which a handler has been registered will invoke that
2005      * handler.
2006      *
2007      * Throws: various kinds of XMLException
2008      */
2009     void parse()
2010     {
2011         import std.algorithm.searching : startsWith;
2012         import std.string : indexOf;
2014         string t;
2015         const Tag root = tag_;
2016         Tag[string] startTags;
2017         if (tag_ !is null) startTags[tag_.name] = tag_;
2019         while (s.length != 0)
2020         {
2021             if (startsWith(*s,"<!--"))
2022             {
2023                 chop(*s,4);
2024                 t = chop(*s,indexOf(*s,"-->"));
2025                 if (commentHandler.funcptr !is null) commentHandler(t);
2026                 chop(*s,3);
2027             }
2028             else if (startsWith(*s,"<![CDATA["))
2029             {
2030                 chop(*s,9);
2031                 t = chop(*s,indexOf(*s,"]]>"));
2032                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2033                 chop(*s,3);
2034             }
2035             else if (startsWith(*s,"<!"))
2036             {
2037                 chop(*s,2);
2038                 t = chop(*s,indexOf(*s,">"));
2039                 if (xiHandler.funcptr !is null) xiHandler(t);
2040                 chop(*s,1);
2041             }
2042             else if (startsWith(*s,"<?"))
2043             {
2044                 chop(*s,2);
2045                 t = chop(*s,indexOf(*s,"?>"));
2046                 if (piHandler.funcptr !is null) piHandler(t);
2047                 chop(*s,2);
2048             }
2049             else if (startsWith(*s,"<"))
2050             {
2051                 tag_ = new Tag(*s,true);
2052                 if (root is null)
2053                     return; // Return to constructor of derived class
2055                 if (tag_.isStart)
2056                 {
2057                     startTags[tag_.name] = tag_;
2059                     auto parser = new ElementParser(this);
2061                     auto handler = tag_.name in onStartTag;
2062                     if (handler !is null) (*handler)(parser);
2063                     else
2064                     {
2065                         handler = null in onStartTag;
2066                         if (handler !is null) (*handler)(parser);
2067                     }
2068                 }
2069                 else if (tag_.isEnd)
2070                 {
2071                     const startTag = startTags[tag_.name];
2072                     string text;
2074                     if (startTag.tagString.length == 0)
2075                         assert(0);
2077                     immutable(char)* p = startTag.tagString.ptr
2078                         + startTag.tagString.length;
2079                     immutable(char)* q = &tag_.tagString[0];
2080                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2082                     auto element = new Element(startTag);
2083                     if (text.length != 0) element ~= new Text(text);
2085                     auto handler = tag_.name in onEndTag;
2086                     if (handler !is null) (*handler)(element);
2087                     else
2088                     {
2089                         handler = null in onEndTag;
2090                         if (handler !is null) (*handler)(element);
2091                     }
2093                     if (tag_.name == root.name) return;
2094                 }
2095                 else if (tag_.isEmpty)
2096                 {
2097                     Tag startTag = new Tag(tag_.name);
2099                     // FIX by hed010gy, for bug 2979
2100                     // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2101                     if (tag_.attr.length > 0)
2102                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2103                     // END FIX
2105                     // Handle the pretend start tag
2106                     string s2;
2107                     auto parser = new ElementParser(startTag,&s2);
2108                     auto handler1 = startTag.name in onStartTag;
2109                     if (handler1 !is null) (*handler1)(parser);
2110                     else
2111                     {
2112                         handler1 = null in onStartTag;
2113                         if (handler1 !is null) (*handler1)(parser);
2114                     }
2116                     // Handle the pretend end tag
2117                     auto element = new Element(startTag);
2118                     auto handler2 = tag_.name in onEndTag;
2119                     if (handler2 !is null) (*handler2)(element);
2120                     else
2121                     {
2122                         handler2 = null in onEndTag;
2123                         if (handler2 !is null) (*handler2)(element);
2124                     }
2125                 }
2126             }
2127             else
2128             {
2129                 t = chop(*s,indexOf(*s,"<"));
2130                 if (rawTextHandler.funcptr !is null)
2131                     rawTextHandler(t);
2132                 else if (textHandler.funcptr !is null)
2133                     textHandler(decode(t,DecodeMode.LOOSE));
2134             }
2135         }
2136     }
2138     /**
2139      * Returns that part of the element which has already been parsed
2140      */
2141     override string toString() const @nogc @safe pure nothrow
2142     {
2143         assert(elementStart.length >= s.length);
2144         return elementStart[0 .. elementStart.length - s.length];
2145     }
2147 }
2149 private
2150 {
2151     template Check(string msg)
2152     {
2153         string old = s;
2155         void fail() @safe pure
2156         {
2157             s = old;
2158             throw new Err(s,msg);
2159         }
2161         void fail(Err e) @safe pure
2162         {
2163             s = old;
2164             throw new Err(s,msg,e);
2165         }
2167         void fail(string msg2) @safe pure
2168         {
2169             fail(new Err(s,msg2));
2170         }
2171     }
2173     void checkMisc(ref string s) @safe pure // rule 27
2174     {
2175         import std.algorithm.searching : startsWith;
2177         mixin Check!("Misc");
2179         try
2180         {
2181                  if (s.startsWith("<!--")) { checkComment(s); }
2182             else if (s.startsWith("<?"))   { checkPI(s); }
2183             else                           { checkSpace(s); }
2184         }
2185         catch (Err e) { fail(e); }
2186     }
2188     void checkDocument(ref string s) @safe pure // rule 1
2189     {
2190         mixin Check!("Document");
2191         try
2192         {
2193             checkProlog(s);
2194             checkElement(s);
2195             star!(checkMisc)(s);
2196         }
2197         catch (Err e) { fail(e); }
2198     }
2200     void checkChars(ref string s) @safe pure // rule 2
2201     {
2202         // TO DO - Fix std.utf stride and decode functions, then use those
2203         // instead
2204         import std.format : format;
2206         mixin Check!("Chars");
2208         dchar c;
2209         ptrdiff_t n = -1;
2210         // 'i' must not be smaller than size_t because size_t is used internally in
2211         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2212         foreach (size_t i, dchar d; s)
2213         {
2214             if (!isChar(d))
2215             {
2216                 c = d;
2217                 n = i;
2218                 break;
2219             }
2220         }
2221         if (n != -1)
2222         {
2223             s = s[n..$];
2224             fail(format("invalid character: U+%04X",c));
2225         }
2226     }
2228     void checkSpace(ref string s) @safe pure // rule 3
2229     {
2230         import std.algorithm.searching : countUntil;
2231         import std.ascii : isWhite;
2232         import std.utf : byCodeUnit;
2234         mixin Check!("Whitespace");
2235         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2236         if (i == -1 && s.length > 0 && isWhite(s[0]))
2237             s = s[$ .. $];
2238         else if (i > -1)
2239             s = s[i .. $];
2240         if (s is old) fail();
2241     }
2243     void checkName(ref string s, out string name) @safe pure // rule 5
2244     {
2245         mixin Check!("Name");
2247         if (s.length == 0) fail();
2248         ptrdiff_t n;
2249         // 'i' must not be smaller than size_t because size_t is used internally in
2250         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2251         foreach (size_t i, dchar c; s)
2252         {
2253             if (c == '_' || c == ':' || isLetter(c)) continue;
2254             if (i == 0) fail();
2255             if (c == '-' || c == '.' || isDigit(c)
2256                 || isCombiningChar(c) || isExtender(c)) continue;
2257             n = i;
2258             break;
2259         }
2260         name = s[0 .. n];
2261         s = s[n..$];
2262     }
2264     void checkAttValue(ref string s) @safe pure // rule 10
2265     {
2266         import std.algorithm.searching : countUntil;
2267         import std.utf : byCodeUnit;
2269         mixin Check!("AttValue");
2271         if (s.length == 0) fail();
2272         char c = s[0];
2273         if (c != '\u0022' && c != '\u0027')
2274             fail("attribute value requires quotes");
2275         s = s[1..$];
2276         for (;;)
2277         {
2278             s = s[s.byCodeUnit.countUntil(c) .. $];
2279             if (s.length == 0) fail("unterminated attribute value");
2280             if (s[0] == '<') fail("< found in attribute value");
2281             if (s[0] == c) break;
2282             try { checkReference(s); } catch (Err e) { fail(e); }
2283         }
2284         s = s[1..$];
2285     }
2287     void checkCharData(ref string s) @safe pure // rule 14
2288     {
2289         import std.algorithm.searching : startsWith;
2291         mixin Check!("CharData");
2293         while (s.length != 0)
2294         {
2295             if (s.startsWith("&")) break;
2296             if (s.startsWith("<")) break;
2297             if (s.startsWith("]]>")) fail("]]> found within char data");
2298             s = s[1..$];
2299         }
2300     }
2302     void checkComment(ref string s) @safe pure // rule 15
2303     {
2304         import std.string : indexOf;
2306         mixin Check!("Comment");
2308         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2309         ptrdiff_t n = s.indexOf("--");
2310         if (n == -1) fail("unterminated comment");
2311         s = s[n..$];
2312         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2313     }
2315     void checkPI(ref string s) @safe pure // rule 16
2316     {
2317         mixin Check!("PI");
2319         try
2320         {
2321             checkLiteral("<?",s);
2322             checkEnd("?>",s);
2323         }
2324         catch (Err e) { fail(e); }
2325     }
2327     void checkCDSect(ref string s) @safe pure // rule 18
2328     {
2329         mixin Check!("CDSect");
2331         try
2332         {
2333             checkLiteral(cdata,s);
2334             checkEnd("]]>",s);
2335         }
2336         catch (Err e) { fail(e); }
2337     }
2339     void checkProlog(ref string s) @safe pure // rule 22
2340     {
2341         mixin Check!("Prolog");
2343         try
2344         {
2345             /* The XML declaration is optional
2346              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2347              */
2348             opt!(checkXMLDecl)(s);
2350             star!(checkMisc)(s);
2351             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2352         }
2353         catch (Err e) { fail(e); }
2354     }
2356     void checkXMLDecl(ref string s) @safe pure // rule 23
2357     {
2358         mixin Check!("XMLDecl");
2360         try
2361         {
2362             checkLiteral("<?xml",s);
2363             checkVersionInfo(s);
2364             opt!(checkEncodingDecl)(s);
2365             opt!(checkSDDecl)(s);
2366             opt!(checkSpace)(s);
2367             checkLiteral("?>",s);
2368         }
2369         catch (Err e) { fail(e); }
2370     }
2372     void checkVersionInfo(ref string s) @safe pure // rule 24
2373     {
2374         mixin Check!("VersionInfo");
2376         try
2377         {
2378             checkSpace(s);
2379             checkLiteral("version",s);
2380             checkEq(s);
2381             quoted!(checkVersionNum)(s);
2382         }
2383         catch (Err e) { fail(e); }
2384     }
2386     void checkEq(ref string s) @safe pure // rule 25
2387     {
2388         mixin Check!("Eq");
2390         try
2391         {
2392             opt!(checkSpace)(s);
2393             checkLiteral("=",s);
2394             opt!(checkSpace)(s);
2395         }
2396         catch (Err e) { fail(e); }
2397     }
2399     void checkVersionNum(ref string s) @safe pure // rule 26
2400     {
2401         import std.algorithm.searching : countUntil;
2402         import std.utf : byCodeUnit;
2404         mixin Check!("VersionNum");
2406         s = s[s.byCodeUnit.countUntil('\"') .. $];
2407         if (s is old) fail();
2408     }
2410     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2411     {
2412         mixin Check!("DocTypeDecl");
2414         try
2415         {
2416             checkLiteral("<!DOCTYPE",s);
2417             //
2418             // TO DO -- ensure DOCTYPE is well formed
2419             // (But not yet. That's one of our "future directions")
2420             //
2421             checkEnd(">",s);
2422         }
2423         catch (Err e) { fail(e); }
2424     }
2426     void checkSDDecl(ref string s) @safe pure // rule 32
2427     {
2428         import std.algorithm.searching : startsWith;
2430         mixin Check!("SDDecl");
2432         try
2433         {
2434             checkSpace(s);
2435             checkLiteral("standalone",s);
2436             checkEq(s);
2437         }
2438         catch (Err e) { fail(e); }
2440         int n = 0;
2441              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2442         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2443         else fail("standalone attribute value must be 'yes', \"yes\","~
2444             " 'no' or \"no\"");
2445         s = s[n..$];
2446     }
2448     void checkElement(ref string s) @safe pure // rule 39
2449     {
2450         mixin Check!("Element");
2452         string sname,ename,t;
2453         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2455         if (t == "STag")
2456         {
2457             try
2458             {
2459                 checkContent(s);
2460                 t = s;
2461                 checkETag(s,ename);
2462             }
2463             catch (Err e) { fail(e); }
2465             if (sname != ename)
2466             {
2467                 s = t;
2468                 fail("end tag name \"" ~ ename
2469                     ~ "\" differs from start tag name \""~sname~"\"");
2470             }
2471         }
2472     }
2474     // rules 40 and 44
2475     void checkTag(ref string s, out string type, out string name) @safe pure
2476     {
2477         mixin Check!("Tag");
2479         try
2480         {
2481             type = "STag";
2482             checkLiteral("<",s);
2483             checkName(s,name);
2484             star!(seq!(checkSpace,checkAttribute))(s);
2485             opt!(checkSpace)(s);
2486             if (s.length != 0 && s[0] == '/')
2487             {
2488                 s = s[1..$];
2489                 type = "ETag";
2490             }
2491             checkLiteral(">",s);
2492         }
2493         catch (Err e) { fail(e); }
2494     }
2496     void checkAttribute(ref string s) @safe pure // rule 41
2497     {
2498         mixin Check!("Attribute");
2500         try
2501         {
2502             string name;
2503             checkName(s,name);
2504             checkEq(s);
2505             checkAttValue(s);
2506         }
2507         catch (Err e) { fail(e); }
2508     }
2510     void checkETag(ref string s, out string name) @safe pure // rule 42
2511     {
2512         mixin Check!("ETag");
2514         try
2515         {
2516             checkLiteral("</",s);
2517             checkName(s,name);
2518             opt!(checkSpace)(s);
2519             checkLiteral(">",s);
2520         }
2521         catch (Err e) { fail(e); }
2522     }
2524     void checkContent(ref string s) @safe pure // rule 43
2525     {
2526         import std.algorithm.searching : startsWith;
2528         mixin Check!("Content");
2530         try
2531         {
2532             while (s.length != 0)
2533             {
2534                 old = s;
2535                      if (s.startsWith("&"))        { checkReference(s); }
2536                 else if (s.startsWith("<!--"))     { checkComment(s); }
2537                 else if (s.startsWith("<?"))       { checkPI(s); }
2538                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2539                 else if (s.startsWith("</"))       { break; }
2540                 else if (s.startsWith("<"))        { checkElement(s); }
2541                 else                               { checkCharData(s); }
2542             }
2543         }
2544         catch (Err e) { fail(e); }
2545     }
2547     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2548     {
2549         import std.format : format;
2551         mixin Check!("CharRef");
2553         c = 0;
2554         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2555         int radix = 10;
2556         if (s.length != 0 && s[0] == 'x')
2557         {
2558             s = s[1..$];
2559             radix = 16;
2560         }
2561         if (s.length == 0) fail("unterminated character reference");
2562         if (s[0] == ';')
2563             fail("character reference must have at least one digit");
2564         while (s.length != 0)
2565         {
2566             immutable char d = s[0];
2567             int n = 0;
2568             switch (d)
2569             {
2570                 case 'F','f': ++n;      goto case;
2571                 case 'E','e': ++n;      goto case;
2572                 case 'D','d': ++n;      goto case;
2573                 case 'C','c': ++n;      goto case;
2574                 case 'B','b': ++n;      goto case;
2575                 case 'A','a': ++n;      goto case;
2576                 case '9':     ++n;      goto case;
2577                 case '8':     ++n;      goto case;
2578                 case '7':     ++n;      goto case;
2579                 case '6':     ++n;      goto case;
2580                 case '5':     ++n;      goto case;
2581                 case '4':     ++n;      goto case;
2582                 case '3':     ++n;      goto case;
2583                 case '2':     ++n;      goto case;
2584                 case '1':     ++n;      goto case;
2585                 case '0':     break;
2586                 default: n = 100; break;
2587             }
2588             if (n >= radix) break;
2589             c *= radix;
2590             c += n;
2591             s = s[1..$];
2592         }
2593         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2594         if (s.length == 0 || s[0] != ';') fail("expected ;");
2595         else s = s[1..$];
2596     }
2598     void checkReference(ref string s) @safe pure // rule 67
2599     {
2600         import std.algorithm.searching : startsWith;
2602         mixin Check!("Reference");
2604         try
2605         {
2606             dchar c;
2607             if (s.startsWith("&#")) checkCharRef(s,c);
2608             else checkEntityRef(s);
2609         }
2610         catch (Err e) { fail(e); }
2611     }
2613     void checkEntityRef(ref string s) @safe pure // rule 68
2614     {
2615         mixin Check!("EntityRef");
2617         try
2618         {
2619             string name;
2620             checkLiteral("&",s);
2621             checkName(s,name);
2622             checkLiteral(";",s);
2623         }
2624         catch (Err e) { fail(e); }
2625     }
2627     void checkEncName(ref string s) @safe pure // rule 81
2628     {
2629         import std.algorithm.searching : countUntil;
2630         import std.ascii : isAlpha;
2631         import std.utf : byCodeUnit;
2633         mixin Check!("EncName");
2635         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2636         if (s is old) fail();
2637         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2638     }
2640     void checkEncodingDecl(ref string s) @safe pure // rule 80
2641     {
2642         mixin Check!("EncodingDecl");
2644         try
2645         {
2646             checkSpace(s);
2647             checkLiteral("encoding",s);
2648             checkEq(s);
2649             quoted!(checkEncName)(s);
2650         }
2651         catch (Err e) { fail(e); }
2652     }
2654     // Helper functions
2656     void checkLiteral(string literal,ref string s) @safe pure
2657     {
2658         import std.string : startsWith;
2660         mixin Check!("Literal");
2662         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2663         s = s[literal.length..$];
2664     }
2666     void checkEnd(string end,ref string s) @safe pure
2667     {
2668         import std.string : indexOf;
2669         // Deliberately no mixin Check here.
2671         auto n = s.indexOf(end);
2672         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2673         s = s[n..$];
2674         checkLiteral(end,s);
2675     }
2677     // Metafunctions -- none of these use mixin Check
2679     void opt(alias f)(ref string s)
2680     {
2681         try { f(s); } catch (Err e) {}
2682     }
2684     void plus(alias f)(ref string s)
2685     {
2686         f(s);
2687         star!(f)(s);
2688     }
2690     void star(alias f)(ref string s)
2691     {
2692         while (s.length != 0)
2693         {
2694             try { f(s); }
2695             catch (Err e) { return; }
2696         }
2697     }
2699     void quoted(alias f)(ref string s)
2700     {
2701         import std.string : startsWith;
2703         if (s.startsWith("'"))
2704         {
2705             checkLiteral("'",s);
2706             f(s);
2707             checkLiteral("'",s);
2708         }
2709         else
2710         {
2711             checkLiteral("\"",s);
2712             f(s);
2713             checkLiteral("\"",s);
2714         }
2715     }
2717     void seq(alias f,alias g)(ref string s)
2718     {
2719         f(s);
2720         g(s);
2721     }
2722 }
2724 /**
2725  * Check an entire XML document for well-formedness
2726  *
2727  * Params:
2728  *      s = the document to be checked, passed as a string
2729  *
2730  * Throws: CheckException if the document is not well formed
2731  *
2732  * CheckException's toString() method will yield the complete hierarchy of
2733  * parse failure (the XML equivalent of a stack trace), giving the line and
2734  * column number of every failure at every level.
2735  */
2736 void check(string s) @safe pure
2737 {
2738     try
2739     {
2740         checkChars(s);
2741         checkDocument(s);
2742         if (s.length != 0) throw new Err(s,"Junk found after document");
2743     }
2744     catch (Err e)
2745     {
2746         e.complete(s);
2747         throw e;
2748     }
2749 }
2751 @system pure unittest
2752 {
2753     import std.string : indexOf;
2755     try
2756     {
2757         check(q"[<?xml version="1.0"?>
2758         <catalog>
2759            <book id="bk101">
2760               <author>Gambardella, Matthew</author>
2761               <title>XML Developer's Guide</title>
2762               <genre>Computer</genre>
2763               <price>44.95</price>
2764               <publish_date>2000-10-01</publish_date>
2765               <description>An in-depth look at creating applications
2766               with XML.</description>
2767            </book>
2768            <book id="bk102">
2769               <author>Ralls, Kim</author>
2770               <title>Midnight Rain</title>
2771               <genre>Fantasy</genres>
2772               <price>5.95</price>
2773               <publish_date>2000-12-16</publish_date>
2774               <description>A former architect battles corporate zombies,
2775               an evil sorceress, and her own childhood to become queen
2776               of the world.</description>
2777            </book>
2778            <book id="bk103">
2779               <author>Corets, Eva</author>
2780               <title>Maeve Ascendant</title>
2781               <genre>Fantasy</genre>
2782               <price>5.95</price>
2783               <publish_date>2000-11-17</publish_date>
2784               <description>After the collapse of a nanotechnology
2785               society in England, the young survivors lay the
2786               foundation for a new society.</description>
2787            </book>
2788         </catalog>
2789         ]");
2790         assert(false);
2791     }
2792     catch (CheckException e)
2793     {
2794         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2795                                       " from start tag name \"genre\"");
2796         assert(n != -1);
2797     }
2798 }
2800 @system unittest
2801 {
2802     string s = q"EOS
2803 <?xml version="1.0"?>
2804 <set>
2805     <one>A</one>
2806     <!-- comment -->
2807     <two>B</two>
2808 </set>
2809 EOS";
2810     try
2811     {
2812         check(s);
2813     }
2814     catch (CheckException e)
2815     {
2816         assert(0, e.toString());
2817     }
2818 }
2820 @system unittest
2821 {
2822     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2823                         xmlns:stream="http://etherx.'jabber'.org/streams"
2824                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2825                         xml:lang="en" version="1.0" attr='a"b"c'>
2826                         </stream:stream></r>`;
2828     DocumentParser parser = new DocumentParser(test_xml);
2829     bool tested = false;
2830     parser.onStartTag["stream:stream"] = (ElementParser p) {
2831         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2832         assert(p.tag.attr["from"] == "jid.pl");
2833         assert(p.tag.attr["attr"] == "a\"b\"c");
2834         tested = true;
2835     };
2836     parser.parse();
2837     assert(tested);
2838 }
2840 @system unittest
2841 {
2842     string s = q"EOS
2843 <?xml version="1.0" encoding="utf-8"?> <Tests>
2844     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2845 </Tests>
2846 EOS";
2847     auto xml = new DocumentParser(s);
2849     xml.onStartTag["Test"] = (ElementParser xml) {
2850         assert(xml.tag.attr["thing"] == "What & Up");
2851     };
2853     xml.onEndTag["Test"] = (in Element e) {
2854         assert(e.text() == "What & Up Second");
2855     };
2856     xml.parse();
2857 }
2859 @system unittest
2860 {
2861     string s = `<tag attr="&quot;value&gt;" />`;
2862     auto doc = new Document(s);
2863     assert(doc.toString() == s);
2864 }
2866 /** The base class for exceptions thrown by this module */
2867 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2869 // Other exceptions
2871 /// Thrown during Comment constructor
2872 class CommentException : XMLException
2873 { private this(string msg) @safe pure { super(msg); } }
2875 /// Thrown during CData constructor
2876 class CDataException : XMLException
2877 { private this(string msg) @safe pure { super(msg); } }
2879 /// Thrown during XMLInstruction constructor
2880 class XIException : XMLException
2881 { private this(string msg) @safe pure { super(msg); } }
2883 /// Thrown during ProcessingInstruction constructor
2884 class PIException : XMLException
2885 { private this(string msg) @safe pure { super(msg); } }
2887 /// Thrown during Text constructor
2888 class TextException : XMLException
2889 { private this(string msg) @safe pure { super(msg); } }
2891 /// Thrown during decode()
2892 class DecodeException : XMLException
2893 { private this(string msg) @safe pure { super(msg); } }
2895 /// Thrown if comparing with wrong type
2896 class InvalidTypeException : XMLException
2897 { private this(string msg) @safe pure { super(msg); } }
2899 /// Thrown when parsing for Tags
2900 class TagException : XMLException
2901 { private this(string msg) @safe pure { super(msg); } }
2903 /**
2904  * Thrown during check()
2905  */
2906 class CheckException : XMLException
2907 {
2908     CheckException err; /// Parent in hierarchy
2909     private string tail;
2910     /**
2911      * Name of production rule which failed to parse,
2912      * or specific error message
2913      */
2914     string msg;
2915     size_t line = 0; /// Line number at which parse failure occurred
2916     size_t column = 0; /// Column number at which parse failure occurred
2918     private this(string tail,string msg,Err err=null) @safe pure
2919     {
2920         super(null);
2921         this.tail = tail;
2922         this.msg = msg;
2923         this.err = err;
2924     }
2926     private void complete(string entire) @safe pure
2927     {
2928         import std.string : count, lastIndexOf;
2929         import std.utf : toUTF32;
2931         string head = entire[0..$-tail.length];
2932         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2933         line = head.count("\n") + 1;
2934         dstring t = toUTF32(head[n..$]);
2935         column = t.length + 1;
2936         if (err !is null) err.complete(entire);
2937     }
2939     override string toString() const @safe pure
2940     {
2941         import std.format : format;
2943         string s;
2944         if (line != 0) s = format("Line %d, column %d: ",line,column);
2945         s ~= msg;
2946         s ~= '\n';
2947         if (err !is null) s = err.toString() ~ s;
2948         return s;
2949     }
2950 }
2952 private alias Err = CheckException;
2954 // Private helper functions
2956 private
2957 {
2958     inout(T) toType(T)(inout Object o)
2959     {
2960         T t = cast(T)(o);
2961         if (t is null)
2962         {
2963             throw new InvalidTypeException("Attempt to compare a "
2964                 ~ T.stringof ~ " with an instance of another type");
2965         }
2966         return t;
2967     }
2969     string chop(ref string s, size_t n) @safe pure nothrow
2970     {
2971         if (n == -1) n = s.length;
2972         string t = s[0 .. n];
2973         s = s[n..$];
2974         return t;
2975     }
2977     bool optc(ref string s, char c) @safe pure nothrow
2978     {
2979         immutable bool b = s.length != 0 && s[0] == c;
2980         if (b) s = s[1..$];
2981         return b;
2982     }
2984     void reqc(ref string s, char c) @safe pure
2985     {
2986         if (s.length == 0 || s[0] != c) throw new TagException("");
2987         s = s[1..$];
2988     }
2990     char requireOneOf(ref string s, string chars) @safe pure
2991     {
2992         import std.string : indexOf;
2994         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2995             throw new TagException("");
2996         immutable char ch = s[0];
2997         s = s[1..$];
2998         return ch;
2999     }
3001     alias hash = .hashOf;
3003     // Definitions from the XML specification
3004     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3005         0x10000,0x10FFFF];
3006     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3007         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3008         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3009         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3010         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3011         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3012         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3013         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3014         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3015         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3016         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3017         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3018         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3019         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3020         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3021         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3022         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3023         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3024         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3025         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3026         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3027         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3028         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3029         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3030         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3031         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3032         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3033         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3034         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3035         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3036         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3037         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3038         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3039         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3040         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3041         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3042         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3043         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3044         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3045         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3046         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3047     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3048     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3049         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3050         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3051         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3052         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3053         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3054         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3055         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3056         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3057         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3058         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3059         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3060         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3061         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3062         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3063         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3064         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3065         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3066         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3067         0x3099,0x3099,0x309A,0x309A];
3068     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3069         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3070         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3071         0x0ED9,0x0F20,0x0F29];
3072     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3073         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3074         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3076     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3077     {
3078         while (table.length != 0)
3079         {
3080             auto m = (table.length >> 1) & ~1;
3081             if (c < table[m])
3082             {
3083                 table = table[0 .. m];
3084             }
3085             else if (c > table[m+1])
3086             {
3087                 table = table[m+2..$];
3088             }
3089             else return true;
3090         }
3091         return false;
3092     }
3094     string startOf(string s) @safe nothrow pure
3095     {
3096         string r;
3097         foreach (char c;s)
3098         {
3099             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3100             if (r.length >= 40) { r ~= "___"; break; }
3101         }
3102         return r;
3103     }
3105     void exit(string s=null)
3106     {
3107         throw new XMLException(s);
3108     }
3109 }