1 // Written in the D programming language. 2 3 /** 4 $(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will remain until we have a suitable replacement, 6 but be aware that it will not remain long term.) 7 8 Classes and functions for creating and parsing XML 9 10 The basic architecture of this module is that there are standalone functions, 11 classes for constructing an XML document from scratch (Tag, Element and 12 Document), and also classes for parsing a pre-existing XML file (ElementParser 13 and DocumentParser). The parsing classes <i>may</i> be used to build a 14 Document, but that is not their primary purpose. The handling capabilities of 15 DocumentParser and ElementParser are sufficiently customizable that you can 16 make them do pretty much whatever you want. 17 18 Example: This example creates a DOM (Document Object Model) tree 19 from an XML file. 20 ------------------------------------------------------------------------------ 21 import undead.xml; 22 import std.stdio; 23 import std.string; 24 import std.file; 25 26 // books.xml is used in various samples throughout the Microsoft XML Core 27 // Services (MSXML) SDK. 28 // 29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 30 31 void main() 32 { 33 string s = cast(string) std.file.read("books.xml"); 34 35 // Check for well-formedness 36 check(s); 37 38 // Make a DOM tree 39 auto doc = new Document(s); 40 41 // Plain-print it 42 writeln(doc); 43 } 44 ------------------------------------------------------------------------------ 45 46 Example: This example does much the same thing, except that the file is 47 deconstructed and reconstructed by hand. This is more work, but the 48 techniques involved offer vastly more power. 49 ------------------------------------------------------------------------------ 50 import undead.xml; 51 import std.stdio; 52 import std.string; 53 54 struct Book 55 { 56 string id; 57 string author; 58 string title; 59 string genre; 60 string price; 61 string pubDate; 62 string description; 63 } 64 65 void main() 66 { 67 string s = cast(string) std.file.read("books.xml"); 68 69 // Check for well-formedness 70 check(s); 71 72 // Take it apart 73 Book[] books; 74 75 auto xml = new DocumentParser(s); 76 xml.onStartTag["book"] = (ElementParser xml) 77 { 78 Book book; 79 book.id = xml.tag.attr["id"]; 80 81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 87 88 xml.parse(); 89 90 books ~= book; 91 }; 92 xml.parse(); 93 94 // Put it back together again; 95 auto doc = new Document(new Tag("catalog")); 96 foreach (book;books) 97 { 98 auto element = new Element("book"); 99 element.tag.attr["id"] = book.id; 100 101 element ~= new Element("author", book.author); 102 element ~= new Element("title", book.title); 103 element ~= new Element("genre", book.genre); 104 element ~= new Element("price", book.price); 105 element ~= new Element("publish-date",book.pubDate); 106 element ~= new Element("description", book.description); 107 108 doc ~= element; 109 } 110 111 // Pretty-print it 112 writefln(join(doc.pretty(3),"\n")); 113 } 114 ------------------------------------------------------------------------------- 115 Copyright: Copyright Janice Caron 2008 - 2009. 116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 117 Authors: Janice Caron 118 Source: $(PHOBOSSRC std/xml.d) 119 */ 120 /* 121 Copyright Janice Caron 2008 - 2009. 122 Distributed under the Boost Software License, Version 1.0. 123 (See accompanying file LICENSE_1_0.txt or copy at 124 http://www.boost.org/LICENSE_1_0.txt) 125 */ 126 module undead.xml; 127 128 enum cdata = "<![CDATA["; 129 130 /** 131 * Returns true if the character is a character according to the XML standard 132 * 133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 134 * 135 * Params: 136 * c = the character to be tested 137 */ 138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 139 { 140 if (c <= 0xD7FF) 141 { 142 if (c >= 0x20) 143 return true; 144 switch (c) 145 { 146 case 0xA: 147 case 0x9: 148 case 0xD: 149 return true; 150 default: 151 return false; 152 } 153 } 154 else if (0xE000 <= c && c <= 0x10FFFF) 155 { 156 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 157 return true; 158 } 159 return false; 160 } 161 162 @safe @nogc nothrow pure unittest 163 { 164 assert(!isChar(cast(dchar) 0x8)); 165 assert( isChar(cast(dchar) 0x9)); 166 assert( isChar(cast(dchar) 0xA)); 167 assert(!isChar(cast(dchar) 0xB)); 168 assert(!isChar(cast(dchar) 0xC)); 169 assert( isChar(cast(dchar) 0xD)); 170 assert(!isChar(cast(dchar) 0xE)); 171 assert(!isChar(cast(dchar) 0x1F)); 172 assert( isChar(cast(dchar) 0x20)); 173 assert( isChar('J')); 174 assert( isChar(cast(dchar) 0xD7FF)); 175 assert(!isChar(cast(dchar) 0xD800)); 176 assert(!isChar(cast(dchar) 0xDFFF)); 177 assert( isChar(cast(dchar) 0xE000)); 178 assert( isChar(cast(dchar) 0xFFFD)); 179 assert(!isChar(cast(dchar) 0xFFFE)); 180 assert(!isChar(cast(dchar) 0xFFFF)); 181 assert( isChar(cast(dchar) 0x10000)); 182 assert( isChar(cast(dchar) 0x10FFFF)); 183 assert(!isChar(cast(dchar) 0x110000)); 184 185 debug (stdxml_TestHardcodedChecks) 186 { 187 foreach (c; 0 .. dchar.max + 1) 188 assert(isChar(c) == lookup(CharTable, c)); 189 } 190 } 191 192 /** 193 * Returns true if the character is whitespace according to the XML standard 194 * 195 * Only the following characters are considered whitespace in XML - space, tab, 196 * carriage return and linefeed 197 * 198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 199 * 200 * Params: 201 * c = the character to be tested 202 */ 203 bool isSpace(dchar c) @safe @nogc pure nothrow 204 { 205 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 206 } 207 208 /** 209 * Returns true if the character is a digit according to the XML standard 210 * 211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 212 * 213 * Params: 214 * c = the character to be tested 215 */ 216 bool isDigit(dchar c) @safe @nogc pure nothrow 217 { 218 if (c <= 0x0039 && c >= 0x0030) 219 return true; 220 else 221 return lookup(DigitTable,c); 222 } 223 224 @safe @nogc nothrow pure unittest 225 { 226 debug (stdxml_TestHardcodedChecks) 227 { 228 foreach (c; 0 .. dchar.max + 1) 229 assert(isDigit(c) == lookup(DigitTable, c)); 230 } 231 } 232 233 /** 234 * Returns true if the character is a letter according to the XML standard 235 * 236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 237 * 238 * Params: 239 * c = the character to be tested 240 */ 241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 242 { 243 return isIdeographic(c) || isBaseChar(c); 244 } 245 246 /** 247 * Returns true if the character is an ideographic character according to the 248 * XML standard 249 * 250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 251 * 252 * Params: 253 * c = the character to be tested 254 */ 255 bool isIdeographic(dchar c) @safe @nogc nothrow pure 256 { 257 if (c == 0x3007) 258 return true; 259 if (c <= 0x3029 && c >= 0x3021 ) 260 return true; 261 if (c <= 0x9FA5 && c >= 0x4E00) 262 return true; 263 return false; 264 } 265 266 @safe @nogc nothrow pure unittest 267 { 268 assert(isIdeographic('\u4E00')); 269 assert(isIdeographic('\u9FA5')); 270 assert(isIdeographic('\u3007')); 271 assert(isIdeographic('\u3021')); 272 assert(isIdeographic('\u3029')); 273 274 debug (stdxml_TestHardcodedChecks) 275 { 276 foreach (c; 0 .. dchar.max + 1) 277 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 278 } 279 } 280 281 /** 282 * Returns true if the character is a base character according to the XML 283 * standard 284 * 285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 286 * 287 * Params: 288 * c = the character to be tested 289 */ 290 bool isBaseChar(dchar c) @safe @nogc nothrow pure 291 { 292 return lookup(BaseCharTable,c); 293 } 294 295 /** 296 * Returns true if the character is a combining character according to the 297 * XML standard 298 * 299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 300 * 301 * Params: 302 * c = the character to be tested 303 */ 304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure 305 { 306 return lookup(CombiningCharTable,c); 307 } 308 309 /** 310 * Returns true if the character is an extender according to the XML standard 311 * 312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 313 * 314 * Params: 315 * c = the character to be tested 316 */ 317 bool isExtender(dchar c) @safe @nogc nothrow pure 318 { 319 return lookup(ExtenderTable,c); 320 } 321 322 /** 323 * Encodes a string by replacing all characters which need to be escaped with 324 * appropriate predefined XML entities. 325 * 326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 327 * and greater-than), and similarly, decode() unescapes them. These functions 328 * are provided for convenience only. You do not need to use them when using 329 * the undead.xml classes, because then all the encoding and decoding will be done 330 * for you automatically. 331 * 332 * If the string is not modified, the original will be returned. 333 * 334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 335 * 336 * Params: 337 * s = The string to be encoded 338 * 339 * Returns: The encoded string 340 * 341 * Example: 342 * -------------- 343 * writefln(encode("a > b")); // writes "a > b" 344 * -------------- 345 */ 346 S encode(S)(S s) 347 { 348 import std.array : appender; 349 350 string r; 351 size_t lastI; 352 auto result = appender!S(); 353 354 foreach (i, c; s) 355 { 356 switch (c) 357 { 358 case '&': r = "&"; break; 359 case '"': r = """; break; 360 case '\'': r = "'"; break; 361 case '<': r = "<"; break; 362 case '>': r = ">"; break; 363 default: continue; 364 } 365 // Replace with r 366 result.put(s[lastI .. i]); 367 result.put(r); 368 lastI = i + 1; 369 } 370 371 if (!result.data.ptr) return s; 372 result.put(s[lastI .. $]); 373 return result.data; 374 } 375 376 @safe pure unittest 377 { 378 auto s = "hello"; 379 assert(encode(s) is s); 380 assert(encode("a > b") == "a > b", encode("a > b")); 381 assert(encode("a < b") == "a < b"); 382 assert(encode("don't") == "don't"); 383 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 384 assert(encode("cat & dog") == "cat & dog"); 385 } 386 387 /** 388 * Mode to use for decoding. 389 * 390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 393 */ 394 enum DecodeMode 395 { 396 NONE, LOOSE, STRICT 397 } 398 399 /** 400 * Decodes a string by unescaping all predefined XML entities. 401 * 402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 403 * and greater-than), and similarly, decode() unescapes them. These functions 404 * are provided for convenience only. You do not need to use them when using 405 * the undead.xml classes, because then all the encoding and decoding will be done 406 * for you automatically. 407 * 408 * This function decodes the entities &amp;, &quot;, &apos;, 409 * &lt; and &gt, 410 * as well as decimal and hexadecimal entities such as &#x20AC; 411 * 412 * If the string does not contain an ampersand, the original will be returned. 413 * 414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 416 * (decode, and throw a DecodeException in the event of an error). 417 * 418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 419 * 420 * Params: 421 * s = The string to be decoded 422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 423 * 424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 425 * 426 * Returns: The decoded string 427 * 428 * Example: 429 * -------------- 430 * writefln(decode("a > b")); // writes "a > b" 431 * -------------- 432 */ 433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 434 { 435 import std.algorithm.searching : startsWith; 436 437 if (mode == DecodeMode.NONE) return s; 438 439 string buffer; 440 for (size_t i = 0; i < s.length; i++) 441 { 442 char c = s[i]; 443 if (c != '&') 444 { 445 if (buffer.length != 0) buffer ~= c; 446 } 447 else 448 { 449 if (buffer.length == 0) 450 { 451 buffer = s[0 .. i].dup; 452 } 453 if (startsWith(s[i..$],"&#")) 454 { 455 try 456 { 457 dchar d; 458 string t = s[i..$]; 459 checkCharRef(t, d); 460 char[4] temp; 461 import std.utf : encode; 462 buffer ~= temp[0 .. encode(temp, d)]; 463 i = s.length - t.length - 1; 464 } 465 catch (Err e) 466 { 467 if (mode == DecodeMode.STRICT) 468 throw new DecodeException("Unescaped &"); 469 buffer ~= '&'; 470 } 471 } 472 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 473 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 474 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 475 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 476 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 477 else 478 { 479 if (mode == DecodeMode.STRICT) 480 throw new DecodeException("Unescaped &"); 481 buffer ~= '&'; 482 } 483 } 484 } 485 return (buffer.length == 0) ? s : buffer; 486 } 487 488 @safe pure unittest 489 { 490 void assertNot(string s) pure 491 { 492 bool b = false; 493 try { decode(s,DecodeMode.STRICT); } 494 catch (DecodeException e) { b = true; } 495 assert(b,s); 496 } 497 498 // Assert that things that should work, do 499 auto s = "hello"; 500 assert(decode(s, DecodeMode.STRICT) is s); 501 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 502 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 503 assert(decode("don't", DecodeMode.STRICT) == "don't"); 504 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 505 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 506 assert(decode("*", DecodeMode.STRICT) == "*"); 507 assert(decode("*", DecodeMode.STRICT) == "*"); 508 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 509 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 510 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 511 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 512 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 513 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 514 515 // Assert that things that shouldn't work, don't 516 assertNot("cat & dog"); 517 assertNot("a > b"); 518 assertNot("&#;"); 519 assertNot("&#x;"); 520 assertNot("G;"); 521 assertNot("G;"); 522 } 523 524 /** 525 * Class representing an XML document. 526 * 527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 528 * 529 */ 530 class Document : Element 531 { 532 /** 533 * Contains all text which occurs before the root element. 534 * Defaults to <?xml version="1.0"?> 535 */ 536 string prolog = "<?xml version=\"1.0\"?>"; 537 /** 538 * Contains all text which occurs after the root element. 539 * Defaults to the empty string 540 */ 541 string epilog; 542 543 /** 544 * Constructs a Document by parsing XML text. 545 * 546 * This function creates a complete DOM (Document Object Model) tree. 547 * 548 * The input to this function MUST be valid XML. 549 * This is enforced by DocumentParser's in contract. 550 * 551 * Params: 552 * s = the complete XML text. 553 */ 554 this(string s) 555 in 556 { 557 assert(s.length != 0); 558 } 559 do 560 { 561 auto xml = new DocumentParser(s); 562 string tagString = xml.tag.tagString; 563 564 this(xml.tag); 565 prolog = s[0 .. tagString.ptr - s.ptr]; 566 parse(xml); 567 epilog = *xml.s; 568 } 569 570 /** 571 * Constructs a Document from a Tag. 572 * 573 * Params: 574 * tag = the start tag of the document. 575 */ 576 this(const(Tag) tag) 577 { 578 super(tag); 579 } 580 581 const 582 { 583 /** 584 * Compares two Documents for equality 585 * 586 * Example: 587 * -------------- 588 * Document d1,d2; 589 * if (d1 == d2) { } 590 * -------------- 591 */ 592 override bool opEquals(scope const Object o) const 593 { 594 const scope doc = toType!(const Document)(o); 595 return prolog == doc.prolog 596 && (cast(const) this).Element.opEquals(cast(const) doc) 597 && epilog == doc.epilog; 598 } 599 600 /** 601 * Compares two Documents 602 * 603 * You should rarely need to call this function. It exists so that 604 * Documents can be used as associative array keys. 605 * 606 * Example: 607 * -------------- 608 * Document d1,d2; 609 * if (d1 < d2) { } 610 * -------------- 611 */ 612 override int opCmp(scope const Object o) scope const 613 { 614 const scope doc = toType!(const Document)(o); 615 if (prolog != doc.prolog) 616 return prolog < doc.prolog ? -1 : 1; 617 if (int cmp = this.Element.opCmp(doc)) 618 return cmp; 619 if (epilog != doc.epilog) 620 return epilog < doc.epilog ? -1 : 1; 621 return 0; 622 } 623 624 /** 625 * Returns the hash of a Document 626 * 627 * You should rarely need to call this function. It exists so that 628 * Documents can be used as associative array keys. 629 */ 630 override size_t toHash() scope const @trusted 631 { 632 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 633 } 634 635 /** 636 * Returns the string representation of a Document. (That is, the 637 * complete XML of a document). 638 */ 639 override string toString() scope const @safe 640 { 641 return prolog ~ super.toString() ~ epilog; 642 } 643 } 644 } 645 646 @system unittest 647 { 648 // https://issues.dlang.org/show_bug.cgi?id=14966 649 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 650 651 auto a = new Document(xml); 652 auto b = new Document(xml); 653 assert(a == b); 654 assert(!(a < b)); 655 int[Document] aa; 656 aa[a] = 1; 657 assert(aa[b] == 1); 658 659 b ~= new Element("b"); 660 assert(a < b); 661 assert(b > a); 662 } 663 664 /** 665 * Class representing an XML element. 666 * 667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 668 */ 669 class Element : Item 670 { 671 Tag tag; /// The start tag of the element 672 Item[] items; /// The element's items 673 Text[] texts; /// The element's text items 674 CData[] cdatas; /// The element's CData items 675 Comment[] comments; /// The element's comments 676 ProcessingInstruction[] pis; /// The element's processing instructions 677 Element[] elements; /// The element's child elements 678 679 /** 680 * Constructs an Element given a name and a string to be used as a Text 681 * interior. 682 * 683 * Params: 684 * name = the name of the element. 685 * interior = (optional) the string interior. 686 * 687 * Example: 688 * ------------------------------------------------------- 689 * auto element = new Element("title","Serenity") 690 * // constructs the element <title>Serenity</title> 691 * ------------------------------------------------------- 692 */ 693 this(string name, string interior=null) @safe pure 694 { 695 this(new Tag(name)); 696 if (interior.length != 0) opOpAssign!("~")(new Text(interior)); 697 } 698 699 /** 700 * Constructs an Element from a Tag. 701 * 702 * Params: 703 * tag_ = the start or empty tag of the element. 704 */ 705 this(const(Tag) tag_) @safe pure 706 { 707 this.tag = new Tag(tag_.name); 708 tag.type = TagType.EMPTY; 709 foreach (k,v;tag_.attr) tag.attr[k] = v; 710 tag.tagString = tag_.tagString; 711 } 712 713 /** 714 * Append a text item to the interior of this element 715 * 716 * Params: 717 * item = the item you wish to append. 718 * 719 * Example: 720 * -------------- 721 * Element element; 722 * element ~= new Text("hello"); 723 * -------------- 724 */ 725 void opOpAssign(string op)(Text item) @safe pure 726 if (op == "~") 727 { 728 texts ~= item; 729 appendItem(item); 730 } 731 732 /** 733 * Append a CData item to the interior of this element 734 * 735 * Params: 736 * item = the item you wish to append. 737 * 738 * Example: 739 * -------------- 740 * Element element; 741 * element ~= new CData("hello"); 742 * -------------- 743 */ 744 void opOpAssign(string op)(CData item) @safe pure 745 if (op == "~") 746 { 747 cdatas ~= item; 748 appendItem(item); 749 } 750 751 /** 752 * Append a comment to the interior of this element 753 * 754 * Params: 755 * item = the item you wish to append. 756 * 757 * Example: 758 * -------------- 759 * Element element; 760 * element ~= new Comment("hello"); 761 * -------------- 762 */ 763 void opOpAssign(string op)(Comment item) @safe pure 764 if (op == "~") 765 { 766 comments ~= item; 767 appendItem(item); 768 } 769 770 /** 771 * Append a processing instruction to the interior of this element 772 * 773 * Params: 774 * item = the item you wish to append. 775 * 776 * Example: 777 * -------------- 778 * Element element; 779 * element ~= new ProcessingInstruction("hello"); 780 * -------------- 781 */ 782 void opOpAssign(string op)(ProcessingInstruction item) @safe pure 783 if (op == "~") 784 { 785 pis ~= item; 786 appendItem(item); 787 } 788 789 /** 790 * Append a complete element to the interior of this element 791 * 792 * Params: 793 * item = the item you wish to append. 794 * 795 * Example: 796 * -------------- 797 * Element element; 798 * Element other = new Element("br"); 799 * element ~= other; 800 * // appends element representing <br /> 801 * -------------- 802 */ 803 void opOpAssign(string op)(Element item) @safe pure 804 if (op == "~") 805 { 806 elements ~= item; 807 appendItem(item); 808 } 809 810 private void appendItem(Item item) @safe pure 811 { 812 items ~= item; 813 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 814 tag.type = TagType.START; 815 } 816 817 private void parse(ElementParser xml) 818 { 819 xml.onText = (string s) { opOpAssign!("~")(new Text(s)); }; 820 xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); }; 821 xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); }; 822 xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); }; 823 824 xml.onStartTag[null] = (ElementParser xml) 825 { 826 auto e = new Element(xml.tag); 827 e.parse(xml); 828 opOpAssign!("~")(e); 829 }; 830 831 xml.parse(); 832 } 833 834 /** 835 * Compares two Elements for equality 836 * 837 * Example: 838 * -------------- 839 * Element e1,e2; 840 * if (e1 == e2) { } 841 * -------------- 842 */ 843 override bool opEquals(scope const Object o) const 844 { 845 const scope element = toType!(const Element)(o); 846 immutable len = items.length; 847 if (len != element.items.length) return false; 848 foreach (i; 0 .. len) 849 { 850 if (!items[i].opEquals(element.items[i])) return false; 851 } 852 return true; 853 } 854 855 /** 856 * Compares two Elements 857 * 858 * You should rarely need to call this function. It exists so that Elements 859 * can be used as associative array keys. 860 * 861 * Example: 862 * -------------- 863 * Element e1,e2; 864 * if (e1 < e2) { } 865 * -------------- 866 */ 867 override int opCmp(scope const Object o) @safe const 868 { 869 const scope element = toType!(const Element)(o); 870 for (uint i=0; ; ++i) 871 { 872 if (i == items.length && i == element.items.length) return 0; 873 if (i == items.length) return -1; 874 if (i == element.items.length) return 1; 875 if (!items[i].opEquals(element.items[i])) 876 return items[i].opCmp(element.items[i]); 877 } 878 } 879 880 /** 881 * Returns the hash of an Element 882 * 883 * You should rarely need to call this function. It exists so that Elements 884 * can be used as associative array keys. 885 */ 886 override size_t toHash() scope const @safe 887 { 888 size_t hash = tag.toHash(); 889 foreach (item;items) hash += item.toHash(); 890 return hash; 891 } 892 893 const 894 { 895 /** 896 * Returns the decoded interior of an element. 897 * 898 * The element is assumed to contain text <i>only</i>. So, for 899 * example, given XML such as "<title>Good &amp; 900 * Bad</title>", will return "Good & Bad". 901 * 902 * Params: 903 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 904 * 905 * Throws: DecodeException if decode fails 906 */ 907 string text(DecodeMode mode=DecodeMode.LOOSE) 908 { 909 string buffer; 910 foreach (item;items) 911 { 912 Text t = cast(Text) item; 913 if (t is null) throw new DecodeException(item.toString()); 914 buffer ~= decode(t.toString(),mode); 915 } 916 return buffer; 917 } 918 919 /** 920 * Returns an indented string representation of this item 921 * 922 * Params: 923 * indent = (optional) number of spaces by which to indent this 924 * element. Defaults to 2. 925 */ 926 override string[] pretty(uint indent=2) scope 927 { 928 import std.algorithm.searching : count; 929 import std.string : rightJustify; 930 931 if (isEmptyXML) return [ tag.toEmptyString() ]; 932 933 if (items.length == 1) 934 { 935 auto t = cast(const(Text))(items[0]); 936 if (t !is null) 937 { 938 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 939 } 940 } 941 942 string[] a = [ tag.toStartString() ]; 943 foreach (item;items) 944 { 945 string[] b = item.pretty(indent); 946 foreach (s;b) 947 { 948 a ~= rightJustify(s,count(s) + indent); 949 } 950 } 951 a ~= tag.toEndString(); 952 return a; 953 } 954 955 /** 956 * Returns the string representation of an Element 957 * 958 * Example: 959 * -------------- 960 * auto element = new Element("br"); 961 * writefln(element.toString()); // writes "<br />" 962 * -------------- 963 */ 964 override string toString() scope @safe 965 { 966 if (isEmptyXML) return tag.toEmptyString(); 967 968 string buffer = tag.toStartString(); 969 foreach (item;items) { buffer ~= item.toString(); } 970 buffer ~= tag.toEndString(); 971 return buffer; 972 } 973 974 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 975 } 976 } 977 978 /** 979 * Tag types. 980 * 981 * $(DDOC_ENUM_MEMBERS START) Used for start tags 982 * $(DDOC_ENUM_MEMBERS END) Used for end tags 983 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 984 * 985 */ 986 enum TagType { START, END, EMPTY } 987 988 /** 989 * Class representing an XML tag. 990 * 991 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 992 * 993 * The class invariant guarantees 994 * <ul> 995 * <li> that $(B type) is a valid enum TagType value</li> 996 * <li> that $(B name) consists of valid characters</li> 997 * <li> that each attribute name consists of valid characters</li> 998 * </ul> 999 */ 1000 class Tag 1001 { 1002 TagType type = TagType.START; /// Type of tag 1003 string name; /// Tag name 1004 string[string] attr; /// Associative array of attributes 1005 private string tagString; 1006 1007 invariant() 1008 { 1009 string s; 1010 string t; 1011 1012 assert(type == TagType.START 1013 || type == TagType.END 1014 || type == TagType.EMPTY); 1015 1016 s = name; 1017 try { checkName(s,t); } 1018 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1019 1020 foreach (k,v;attr) 1021 { 1022 s = k; 1023 try { checkName(s,t); } 1024 catch (Err e) 1025 { assert(false,"Invalid attribute name:" ~ e.toString()); } 1026 } 1027 } 1028 1029 /** 1030 * Constructs an instance of Tag with a specified name and type 1031 * 1032 * The constructor does not initialize the attributes. To initialize the 1033 * attributes, you access the $(B attr) member variable. 1034 * 1035 * Params: 1036 * name = the Tag's name 1037 * type = (optional) the Tag's type. If omitted, defaults to 1038 * TagType.START. 1039 * 1040 * Example: 1041 * -------------- 1042 * auto tag = new Tag("img",Tag.EMPTY); 1043 * tag.attr["src"] = "http://example.com/example.jpg"; 1044 * -------------- 1045 */ 1046 this(string name, TagType type=TagType.START) @safe pure 1047 { 1048 this.name = name; 1049 this.type = type; 1050 } 1051 1052 /* Private constructor (so don't ddoc this!) 1053 * 1054 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1055 * 1056 * The string is passed by reference, and is advanced over all characters 1057 * consumed. 1058 * 1059 * The second parameter is a dummy parameter only, required solely to 1060 * distinguish this constructor from the public one. 1061 */ 1062 private this(ref string s, bool dummy) @safe pure 1063 { 1064 import std.algorithm.searching : countUntil; 1065 import std.ascii : isWhite; 1066 import std.utf : byCodeUnit; 1067 1068 tagString = s; 1069 try 1070 { 1071 reqc(s,'<'); 1072 if (optc(s,'/')) type = TagType.END; 1073 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1074 name = s[0 .. i]; 1075 s = s[i .. $]; 1076 1077 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1078 s = s[i .. $]; 1079 1080 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1081 { 1082 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1083 string key = s[0 .. i]; 1084 s = s[i .. $]; 1085 1086 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1087 s = s[i .. $]; 1088 reqc(s,'='); 1089 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1090 s = s[i .. $]; 1091 1092 immutable char quote = requireOneOf(s,"'\""); 1093 i = s.byCodeUnit.countUntil(quote); 1094 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1095 s = s[i .. $]; 1096 reqc(s,quote); 1097 1098 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1099 s = s[i .. $]; 1100 attr[key] = val; 1101 } 1102 if (optc(s,'/')) 1103 { 1104 if (type == TagType.END) throw new TagException(""); 1105 type = TagType.EMPTY; 1106 } 1107 reqc(s,'>'); 1108 tagString.length = tagString.length - s.length; 1109 } 1110 catch (XMLException e) 1111 { 1112 tagString.length = tagString.length - s.length; 1113 throw new TagException(tagString); 1114 } 1115 } 1116 1117 const 1118 { 1119 /** 1120 * Compares two Tags for equality 1121 * 1122 * You should rarely need to call this function. It exists so that Tags 1123 * can be used as associative array keys. 1124 * 1125 * Example: 1126 * -------------- 1127 * Tag tag1,tag2 1128 * if (tag1 == tag2) { } 1129 * -------------- 1130 */ 1131 override bool opEquals(scope Object o) 1132 { 1133 const tag = toType!(const Tag)(o); 1134 return 1135 (name != tag.name) ? false : ( 1136 (attr != tag.attr) ? false : ( 1137 (type != tag.type) ? false : ( 1138 true ))); 1139 } 1140 1141 /** 1142 * Compares two Tags 1143 * 1144 * Example: 1145 * -------------- 1146 * Tag tag1,tag2 1147 * if (tag1 < tag2) { } 1148 * -------------- 1149 */ 1150 override int opCmp(Object o) 1151 { 1152 const tag = toType!(const Tag)(o); 1153 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1154 return 1155 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1156 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1157 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1158 0 ))); 1159 } 1160 1161 /** 1162 * Returns the hash of a Tag 1163 * 1164 * You should rarely need to call this function. It exists so that Tags 1165 * can be used as associative array keys. 1166 */ 1167 override size_t toHash() 1168 { 1169 return .hashOf(name); 1170 } 1171 1172 /** 1173 * Returns the string representation of a Tag 1174 * 1175 * Example: 1176 * -------------- 1177 * auto tag = new Tag("book",TagType.START); 1178 * writefln(tag.toString()); // writes "<book>" 1179 * -------------- 1180 */ 1181 override string toString() @safe 1182 { 1183 if (isEmpty) return toEmptyString(); 1184 return (isEnd) ? toEndString() : toStartString(); 1185 } 1186 1187 private 1188 { 1189 string toNonEndString() @safe 1190 { 1191 import std.format : format; 1192 1193 string s = "<" ~ name; 1194 foreach (key,val;attr) 1195 s ~= format(" %s=\"%s\"",key,encode(val)); 1196 return s; 1197 } 1198 1199 string toStartString() @safe { return toNonEndString() ~ ">"; } 1200 1201 string toEndString() @safe { return "</" ~ name ~ ">"; } 1202 1203 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1204 } 1205 1206 /** 1207 * Returns true if the Tag is a start tag 1208 * 1209 * Example: 1210 * -------------- 1211 * if (tag.isStart) { } 1212 * -------------- 1213 */ 1214 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1215 1216 /** 1217 * Returns true if the Tag is an end tag 1218 * 1219 * Example: 1220 * -------------- 1221 * if (tag.isEnd) { } 1222 * -------------- 1223 */ 1224 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1225 1226 /** 1227 * Returns true if the Tag is an empty tag 1228 * 1229 * Example: 1230 * -------------- 1231 * if (tag.isEmpty) { } 1232 * -------------- 1233 */ 1234 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1235 } 1236 } 1237 1238 /** 1239 * Class representing a comment 1240 */ 1241 class Comment : Item 1242 { 1243 private string content; 1244 1245 /** 1246 * Construct a comment 1247 * 1248 * Params: 1249 * content = the body of the comment 1250 * 1251 * Throws: CommentException if the comment body is illegal (contains "--" 1252 * or exactly equals "-") 1253 * 1254 * Example: 1255 * -------------- 1256 * auto item = new Comment("This is a comment"); 1257 * // constructs <!--This is a comment--> 1258 * -------------- 1259 */ 1260 this(string content) @safe pure 1261 { 1262 import std.string : indexOf; 1263 1264 if (content == "-" || content.indexOf("--") != -1) 1265 throw new CommentException(content); 1266 this.content = content; 1267 } 1268 1269 /** 1270 * Compares two comments for equality 1271 * 1272 * Example: 1273 * -------------- 1274 * Comment item1,item2; 1275 * if (item1 == item2) { } 1276 * -------------- 1277 */ 1278 override bool opEquals(scope const Object o) const 1279 { 1280 const scope item = toType!(const Item)(o); 1281 const t = cast(const Comment) item; 1282 return t !is null && content == t.content; 1283 } 1284 1285 /** 1286 * Compares two comments 1287 * 1288 * You should rarely need to call this function. It exists so that Comments 1289 * can be used as associative array keys. 1290 * 1291 * Example: 1292 * -------------- 1293 * Comment item1,item2; 1294 * if (item1 < item2) { } 1295 * -------------- 1296 */ 1297 override int opCmp(scope const Object o) scope const 1298 { 1299 const scope item = toType!(const Item)(o); 1300 const t = cast(const Comment) item; 1301 return t !is null && (content != t.content 1302 ? (content < t.content ? -1 : 1 ) : 0 ); 1303 } 1304 1305 /** 1306 * Returns the hash of a Comment 1307 * 1308 * You should rarely need to call this function. It exists so that Comments 1309 * can be used as associative array keys. 1310 */ 1311 override size_t toHash() scope const nothrow { return hash(content); } 1312 1313 /** 1314 * Returns a string representation of this comment 1315 */ 1316 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1317 1318 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1319 } 1320 1321 @safe unittest // issue 16241 1322 { 1323 import std.exception : assertThrown; 1324 auto c = new Comment("=="); 1325 assert(c.content == "=="); 1326 assertThrown!CommentException(new Comment("--")); 1327 } 1328 1329 /** 1330 * Class representing a Character Data section 1331 */ 1332 class CData : Item 1333 { 1334 private string content; 1335 1336 /** 1337 * Construct a character data section 1338 * 1339 * Params: 1340 * content = the body of the character data segment 1341 * 1342 * Throws: CDataException if the segment body is illegal (contains "]]>") 1343 * 1344 * Example: 1345 * -------------- 1346 * auto item = new CData("<b>hello</b>"); 1347 * // constructs <![CDATA[<b>hello</b>]]> 1348 * -------------- 1349 */ 1350 this(string content) @safe pure 1351 { 1352 import std.string : indexOf; 1353 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1354 this.content = content; 1355 } 1356 1357 /** 1358 * Compares two CDatas for equality 1359 * 1360 * Example: 1361 * -------------- 1362 * CData item1,item2; 1363 * if (item1 == item2) { } 1364 * -------------- 1365 */ 1366 override bool opEquals(scope const Object o) const 1367 { 1368 const scope item = toType!(const Item)(o); 1369 const t = cast(const CData) item; 1370 return t !is null && content == t.content; 1371 } 1372 1373 /** 1374 * Compares two CDatas 1375 * 1376 * You should rarely need to call this function. It exists so that CDatas 1377 * can be used as associative array keys. 1378 * 1379 * Example: 1380 * -------------- 1381 * CData item1,item2; 1382 * if (item1 < item2) { } 1383 * -------------- 1384 */ 1385 override int opCmp(scope const Object o) scope const 1386 { 1387 const scope item = toType!(const Item)(o); 1388 const t = cast(const CData) item; 1389 return t !is null && (content != t.content 1390 ? (content < t.content ? -1 : 1 ) : 0 ); 1391 } 1392 1393 /** 1394 * Returns the hash of a CData 1395 * 1396 * You should rarely need to call this function. It exists so that CDatas 1397 * can be used as associative array keys. 1398 */ 1399 override size_t toHash() scope const nothrow { return hash(content); } 1400 1401 /** 1402 * Returns a string representation of this CData section 1403 */ 1404 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1405 1406 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1407 } 1408 1409 /** 1410 * Class representing a text (aka Parsed Character Data) section 1411 */ 1412 class Text : Item 1413 { 1414 private string content; 1415 1416 /** 1417 * Construct a text (aka PCData) section 1418 * 1419 * Params: 1420 * content = the text. This function encodes the text before 1421 * insertion, so it is safe to insert any text 1422 * 1423 * Example: 1424 * -------------- 1425 * auto Text = new CData("a < b"); 1426 * // constructs a < b 1427 * -------------- 1428 */ 1429 this(string content) @safe pure 1430 { 1431 this.content = encode(content); 1432 } 1433 1434 /** 1435 * Compares two text sections for equality 1436 * 1437 * Example: 1438 * -------------- 1439 * Text item1,item2; 1440 * if (item1 == item2) { } 1441 * -------------- 1442 */ 1443 override bool opEquals(scope const Object o) const 1444 { 1445 const scope item = toType!(const Item)(o); 1446 const t = cast(const Text) item; 1447 return t !is null && content == t.content; 1448 } 1449 1450 /** 1451 * Compares two text sections 1452 * 1453 * You should rarely need to call this function. It exists so that Texts 1454 * can be used as associative array keys. 1455 * 1456 * Example: 1457 * -------------- 1458 * Text item1,item2; 1459 * if (item1 < item2) { } 1460 * -------------- 1461 */ 1462 override int opCmp(scope const Object o) scope const 1463 { 1464 const scope item = toType!(const Item)(o); 1465 const t = cast(const Text) item; 1466 return t !is null 1467 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1468 } 1469 1470 /** 1471 * Returns the hash of a text section 1472 * 1473 * You should rarely need to call this function. It exists so that Texts 1474 * can be used as associative array keys. 1475 */ 1476 override size_t toHash() scope const nothrow { return hash(content); } 1477 1478 /** 1479 * Returns a string representation of this Text section 1480 */ 1481 override string toString() scope const @safe @nogc pure nothrow { return content; } 1482 1483 /** 1484 * Returns true if the content is the empty string 1485 */ 1486 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1487 } 1488 1489 /** 1490 * Class representing an XML Instruction section 1491 */ 1492 class XMLInstruction : Item 1493 { 1494 private string content; 1495 1496 /** 1497 * Construct an XML Instruction section 1498 * 1499 * Params: 1500 * content = the body of the instruction segment 1501 * 1502 * Throws: XIException if the segment body is illegal (contains ">") 1503 * 1504 * Example: 1505 * -------------- 1506 * auto item = new XMLInstruction("ATTLIST"); 1507 * // constructs <!ATTLIST> 1508 * -------------- 1509 */ 1510 this(string content) @safe pure 1511 { 1512 import std.string : indexOf; 1513 if (content.indexOf(">") != -1) throw new XIException(content); 1514 this.content = content; 1515 } 1516 1517 /** 1518 * Compares two XML instructions for equality 1519 * 1520 * Example: 1521 * -------------- 1522 * XMLInstruction item1,item2; 1523 * if (item1 == item2) { } 1524 * -------------- 1525 */ 1526 override bool opEquals(scope const Object o) const 1527 { 1528 const scope item = toType!(const Item)(o); 1529 const t = cast(const XMLInstruction) item; 1530 return t !is null && content == t.content; 1531 } 1532 1533 /** 1534 * Compares two XML instructions 1535 * 1536 * You should rarely need to call this function. It exists so that 1537 * XmlInstructions can be used as associative array keys. 1538 * 1539 * Example: 1540 * -------------- 1541 * XMLInstruction item1,item2; 1542 * if (item1 < item2) { } 1543 * -------------- 1544 */ 1545 override int opCmp(scope const Object o) scope const 1546 { 1547 const scope item = toType!(const Item)(o); 1548 const t = cast(const XMLInstruction) item; 1549 return t !is null 1550 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1551 } 1552 1553 /** 1554 * Returns the hash of an XMLInstruction 1555 * 1556 * You should rarely need to call this function. It exists so that 1557 * XmlInstructions can be used as associative array keys. 1558 */ 1559 override size_t toHash() scope const nothrow { return hash(content); } 1560 1561 /** 1562 * Returns a string representation of this XmlInstruction 1563 */ 1564 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1565 1566 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1567 } 1568 1569 /** 1570 * Class representing a Processing Instruction section 1571 */ 1572 class ProcessingInstruction : Item 1573 { 1574 private string content; 1575 1576 /** 1577 * Construct a Processing Instruction section 1578 * 1579 * Params: 1580 * content = the body of the instruction segment 1581 * 1582 * Throws: PIException if the segment body is illegal (contains "?>") 1583 * 1584 * Example: 1585 * -------------- 1586 * auto item = new ProcessingInstruction("php"); 1587 * // constructs <?php?> 1588 * -------------- 1589 */ 1590 this(string content) @safe pure 1591 { 1592 import std.string : indexOf; 1593 if (content.indexOf("?>") != -1) throw new PIException(content); 1594 this.content = content; 1595 } 1596 1597 /** 1598 * Compares two processing instructions for equality 1599 * 1600 * Example: 1601 * -------------- 1602 * ProcessingInstruction item1,item2; 1603 * if (item1 == item2) { } 1604 * -------------- 1605 */ 1606 override bool opEquals(scope const Object o) const 1607 { 1608 const scope item = toType!(const Item)(o); 1609 const t = cast(const ProcessingInstruction) item; 1610 return t !is null && content == t.content; 1611 } 1612 1613 /** 1614 * Compares two processing instructions 1615 * 1616 * You should rarely need to call this function. It exists so that 1617 * ProcessingInstructions can be used as associative array keys. 1618 * 1619 * Example: 1620 * -------------- 1621 * ProcessingInstruction item1,item2; 1622 * if (item1 < item2) { } 1623 * -------------- 1624 */ 1625 override int opCmp(scope const Object o) scope const 1626 { 1627 const scope item = toType!(const Item)(o); 1628 const t = cast(const ProcessingInstruction) item; 1629 return t !is null 1630 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1631 } 1632 1633 /** 1634 * Returns the hash of a ProcessingInstruction 1635 * 1636 * You should rarely need to call this function. It exists so that 1637 * ProcessingInstructions can be used as associative array keys. 1638 */ 1639 override size_t toHash() scope const nothrow { return hash(content); } 1640 1641 /** 1642 * Returns a string representation of this ProcessingInstruction 1643 */ 1644 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1645 1646 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always 1647 } 1648 1649 /** 1650 * Abstract base class for XML items 1651 */ 1652 abstract class Item 1653 { 1654 /// Compares with another Item of same type for equality 1655 abstract override bool opEquals(scope const Object o) @safe const; 1656 1657 /// Compares with another Item of same type 1658 abstract override int opCmp(scope const Object o) @safe const; 1659 1660 /// Returns the hash of this item 1661 abstract override size_t toHash() @safe scope const; 1662 1663 /// Returns a string representation of this item 1664 abstract override string toString() @safe scope const; 1665 1666 /** 1667 * Returns an indented string representation of this item 1668 * 1669 * Params: 1670 * indent = number of spaces by which to indent child elements 1671 */ 1672 string[] pretty(uint indent) @safe scope const 1673 { 1674 import std.string : strip; 1675 string s = strip(toString()); 1676 return s.length == 0 ? [] : [ s ]; 1677 } 1678 1679 /// Returns true if the item represents empty XML text 1680 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1681 } 1682 1683 /** 1684 * Class for parsing an XML Document. 1685 * 1686 * This is a subclass of ElementParser. Most of the useful functions are 1687 * documented there. 1688 * 1689 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1690 * 1691 * Bugs: 1692 * Currently only supports UTF documents. 1693 * 1694 * If there is an encoding attribute in the prolog, it is ignored. 1695 * 1696 */ 1697 class DocumentParser : ElementParser 1698 { 1699 string xmlText; 1700 1701 /** 1702 * Constructs a DocumentParser. 1703 * 1704 * The input to this function MUST be valid XML. 1705 * This is enforced by the function's in contract. 1706 * 1707 * Params: 1708 * xmlText_ = the entire XML document as text 1709 * 1710 */ 1711 this(string xmlText_) 1712 in 1713 { 1714 assert(xmlText_.length != 0); 1715 try 1716 { 1717 // Confirm that the input is valid XML 1718 check(xmlText_); 1719 } 1720 catch (CheckException e) 1721 { 1722 // And if it's not, tell the user why not 1723 assert(false, "\n" ~ e.toString()); 1724 } 1725 } 1726 do 1727 { 1728 xmlText = xmlText_; 1729 s = &xmlText; 1730 super(); // Initialize everything 1731 parse(); // Parse through the root tag (but not beyond) 1732 } 1733 } 1734 1735 @system unittest 1736 { 1737 auto doc = new Document("<root><child><grandchild/></child></root>"); 1738 assert(doc.elements.length == 1); 1739 assert(doc.elements[0].tag.name == "child"); 1740 assert(doc.items == doc.elements); 1741 } 1742 1743 /** 1744 * Class for parsing an XML element. 1745 * 1746 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1747 * 1748 * Note that you cannot construct instances of this class directly. You can 1749 * construct a DocumentParser (which is a subclass of ElementParser), but 1750 * otherwise, Instances of ElementParser will be created for you by the 1751 * library, and passed your way via onStartTag handlers. 1752 * 1753 */ 1754 class ElementParser 1755 { 1756 alias Handler = void delegate(string); 1757 alias ElementHandler = void delegate(in Element element); 1758 alias ParserHandler = void delegate(ElementParser parser); 1759 1760 private 1761 { 1762 Tag tag_; 1763 string elementStart; 1764 string* s; 1765 1766 Handler commentHandler = null; 1767 Handler cdataHandler = null; 1768 Handler xiHandler = null; 1769 Handler piHandler = null; 1770 Handler rawTextHandler = null; 1771 Handler textHandler = null; 1772 1773 // Private constructor for start tags 1774 this(ElementParser parent) @safe @nogc pure nothrow 1775 { 1776 s = parent.s; 1777 this(); 1778 tag_ = parent.tag_; 1779 } 1780 1781 // Private constructor for empty tags 1782 this(Tag tag, string* t) @safe @nogc pure nothrow 1783 { 1784 s = t; 1785 this(); 1786 tag_ = tag; 1787 } 1788 } 1789 1790 /** 1791 * The Tag at the start of the element being parsed. You can read this to 1792 * determine the tag's name and attributes. 1793 */ 1794 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1795 1796 /** 1797 * Register a handler which will be called whenever a start tag is 1798 * encountered which matches the specified name. You can also pass null as 1799 * the name, in which case the handler will be called for any unmatched 1800 * start tag. 1801 * 1802 * Example: 1803 * -------------- 1804 * // Call this function whenever a <podcast> start tag is encountered 1805 * onStartTag["podcast"] = (ElementParser xml) 1806 * { 1807 * // Your code here 1808 * // 1809 * // This is a a closure, so code here may reference 1810 * // variables which are outside of this scope 1811 * }; 1812 * 1813 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1814 * // start tag is encountered 1815 * onStartTag["episode"] = &myEpisodeStartHandler; 1816 * 1817 * // call delegate dg for all other start tags 1818 * onStartTag[null] = dg; 1819 * -------------- 1820 * 1821 * This library will supply your function with a new instance of 1822 * ElementHandler, which may be used to parse inside the element whose 1823 * start tag was just found, or to identify the tag attributes of the 1824 * element, etc. 1825 * 1826 * Note that your function will be called for both start tags and empty 1827 * tags. That is, we make no distinction between <br></br> 1828 * and <br/>. 1829 */ 1830 ParserHandler[string] onStartTag; 1831 1832 /** 1833 * Register a handler which will be called whenever an end tag is 1834 * encountered which matches the specified name. You can also pass null as 1835 * the name, in which case the handler will be called for any unmatched 1836 * end tag. 1837 * 1838 * Example: 1839 * -------------- 1840 * // Call this function whenever a </podcast> end tag is encountered 1841 * onEndTag["podcast"] = (in Element e) 1842 * { 1843 * // Your code here 1844 * // 1845 * // This is a a closure, so code here may reference 1846 * // variables which are outside of this scope 1847 * }; 1848 * 1849 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1850 * // end tag is encountered 1851 * onEndTag["episode"] = &myEpisodeEndHandler; 1852 * 1853 * // call delegate dg for all other end tags 1854 * onEndTag[null] = dg; 1855 * -------------- 1856 * 1857 * Note that your function will be called for both start tags and empty 1858 * tags. That is, we make no distinction between <br></br> 1859 * and <br/>. 1860 */ 1861 ElementHandler[string] onEndTag; 1862 1863 protected this() @safe @nogc pure nothrow 1864 { 1865 elementStart = *s; 1866 } 1867 1868 /** 1869 * Register a handler which will be called whenever text is encountered. 1870 * 1871 * Example: 1872 * -------------- 1873 * // Call this function whenever text is encountered 1874 * onText = (string s) 1875 * { 1876 * // Your code here 1877 * 1878 * // The passed parameter s will have been decoded by the time you see 1879 * // it, and so may contain any character. 1880 * // 1881 * // This is a a closure, so code here may reference 1882 * // variables which are outside of this scope 1883 * }; 1884 * -------------- 1885 */ 1886 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1887 1888 /** 1889 * Register an alternative handler which will be called whenever text 1890 * is encountered. This differs from onText in that onText will decode 1891 * the text, whereas onTextRaw will not. This allows you to make design 1892 * choices, since onText will be more accurate, but slower, while 1893 * onTextRaw will be faster, but less accurate. Of course, you can 1894 * still call decode() within your handler, if you want, but you'd 1895 * probably want to use onTextRaw only in circumstances where you 1896 * know that decoding is unnecessary. 1897 * 1898 * Example: 1899 * -------------- 1900 * // Call this function whenever text is encountered 1901 * onText = (string s) 1902 * { 1903 * // Your code here 1904 * 1905 * // The passed parameter s will NOT have been decoded. 1906 * // 1907 * // This is a a closure, so code here may reference 1908 * // variables which are outside of this scope 1909 * }; 1910 * -------------- 1911 */ 1912 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1913 1914 /** 1915 * Register a handler which will be called whenever a character data 1916 * segment is encountered. 1917 * 1918 * Example: 1919 * -------------- 1920 * // Call this function whenever a CData section is encountered 1921 * onCData = (string s) 1922 * { 1923 * // Your code here 1924 * 1925 * // The passed parameter s does not include the opening <![CDATA[ 1926 * // nor closing ]]> 1927 * // 1928 * // This is a a closure, so code here may reference 1929 * // variables which are outside of this scope 1930 * }; 1931 * -------------- 1932 */ 1933 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1934 1935 /** 1936 * Register a handler which will be called whenever a comment is 1937 * encountered. 1938 * 1939 * Example: 1940 * -------------- 1941 * // Call this function whenever a comment is encountered 1942 * onComment = (string s) 1943 * { 1944 * // Your code here 1945 * 1946 * // The passed parameter s does not include the opening <!-- nor 1947 * // closing --> 1948 * // 1949 * // This is a a closure, so code here may reference 1950 * // variables which are outside of this scope 1951 * }; 1952 * -------------- 1953 */ 1954 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1955 1956 /** 1957 * Register a handler which will be called whenever a processing 1958 * instruction is encountered. 1959 * 1960 * Example: 1961 * -------------- 1962 * // Call this function whenever a processing instruction is encountered 1963 * onPI = (string s) 1964 * { 1965 * // Your code here 1966 * 1967 * // The passed parameter s does not include the opening <? nor 1968 * // closing ?> 1969 * // 1970 * // This is a a closure, so code here may reference 1971 * // variables which are outside of this scope 1972 * }; 1973 * -------------- 1974 */ 1975 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1976 1977 /** 1978 * Register a handler which will be called whenever an XML instruction is 1979 * encountered. 1980 * 1981 * Example: 1982 * -------------- 1983 * // Call this function whenever an XML instruction is encountered 1984 * // (Note: XML instructions may only occur preceding the root tag of a 1985 * // document). 1986 * onPI = (string s) 1987 * { 1988 * // Your code here 1989 * 1990 * // The passed parameter s does not include the opening <! nor 1991 * // closing > 1992 * // 1993 * // This is a a closure, so code here may reference 1994 * // variables which are outside of this scope 1995 * }; 1996 * -------------- 1997 */ 1998 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 1999 2000 /** 2001 * Parse an XML element. 2002 * 2003 * Parsing will continue until the end of the current element. Any items 2004 * encountered for which a handler has been registered will invoke that 2005 * handler. 2006 * 2007 * Throws: various kinds of XMLException 2008 */ 2009 void parse() 2010 { 2011 import std.algorithm.searching : startsWith; 2012 import std.string : indexOf; 2013 2014 string t; 2015 const Tag root = tag_; 2016 Tag[string] startTags; 2017 if (tag_ !is null) startTags[tag_.name] = tag_; 2018 2019 while (s.length != 0) 2020 { 2021 if (startsWith(*s,"<!--")) 2022 { 2023 chop(*s,4); 2024 t = chop(*s,indexOf(*s,"-->")); 2025 if (commentHandler.funcptr !is null) commentHandler(t); 2026 chop(*s,3); 2027 } 2028 else if (startsWith(*s,"<![CDATA[")) 2029 { 2030 chop(*s,9); 2031 t = chop(*s,indexOf(*s,"]]>")); 2032 if (cdataHandler.funcptr !is null) cdataHandler(t); 2033 chop(*s,3); 2034 } 2035 else if (startsWith(*s,"<!")) 2036 { 2037 chop(*s,2); 2038 t = chop(*s,indexOf(*s,">")); 2039 if (xiHandler.funcptr !is null) xiHandler(t); 2040 chop(*s,1); 2041 } 2042 else if (startsWith(*s,"<?")) 2043 { 2044 chop(*s,2); 2045 t = chop(*s,indexOf(*s,"?>")); 2046 if (piHandler.funcptr !is null) piHandler(t); 2047 chop(*s,2); 2048 } 2049 else if (startsWith(*s,"<")) 2050 { 2051 tag_ = new Tag(*s,true); 2052 if (root is null) 2053 return; // Return to constructor of derived class 2054 2055 if (tag_.isStart) 2056 { 2057 startTags[tag_.name] = tag_; 2058 2059 auto parser = new ElementParser(this); 2060 2061 auto handler = tag_.name in onStartTag; 2062 if (handler !is null) (*handler)(parser); 2063 else 2064 { 2065 handler = null in onStartTag; 2066 if (handler !is null) (*handler)(parser); 2067 } 2068 } 2069 else if (tag_.isEnd) 2070 { 2071 const startTag = startTags[tag_.name]; 2072 string text; 2073 2074 if (startTag.tagString.length == 0) 2075 assert(0); 2076 2077 immutable(char)* p = startTag.tagString.ptr 2078 + startTag.tagString.length; 2079 immutable(char)* q = &tag_.tagString[0]; 2080 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2081 2082 auto element = new Element(startTag); 2083 if (text.length != 0) element ~= new Text(text); 2084 2085 auto handler = tag_.name in onEndTag; 2086 if (handler !is null) (*handler)(element); 2087 else 2088 { 2089 handler = null in onEndTag; 2090 if (handler !is null) (*handler)(element); 2091 } 2092 2093 if (tag_.name == root.name) return; 2094 } 2095 else if (tag_.isEmpty) 2096 { 2097 Tag startTag = new Tag(tag_.name); 2098 2099 // FIX by hed010gy, for bug 2979 2100 // http://d.puremagic.com/issues/show_bug.cgi?id=2979 2101 if (tag_.attr.length > 0) 2102 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2103 // END FIX 2104 2105 // Handle the pretend start tag 2106 string s2; 2107 auto parser = new ElementParser(startTag,&s2); 2108 auto handler1 = startTag.name in onStartTag; 2109 if (handler1 !is null) (*handler1)(parser); 2110 else 2111 { 2112 handler1 = null in onStartTag; 2113 if (handler1 !is null) (*handler1)(parser); 2114 } 2115 2116 // Handle the pretend end tag 2117 auto element = new Element(startTag); 2118 auto handler2 = tag_.name in onEndTag; 2119 if (handler2 !is null) (*handler2)(element); 2120 else 2121 { 2122 handler2 = null in onEndTag; 2123 if (handler2 !is null) (*handler2)(element); 2124 } 2125 } 2126 } 2127 else 2128 { 2129 t = chop(*s,indexOf(*s,"<")); 2130 if (rawTextHandler.funcptr !is null) 2131 rawTextHandler(t); 2132 else if (textHandler.funcptr !is null) 2133 textHandler(decode(t,DecodeMode.LOOSE)); 2134 } 2135 } 2136 } 2137 2138 /** 2139 * Returns that part of the element which has already been parsed 2140 */ 2141 override string toString() const @nogc @safe pure nothrow 2142 { 2143 assert(elementStart.length >= s.length); 2144 return elementStart[0 .. elementStart.length - s.length]; 2145 } 2146 2147 } 2148 2149 private 2150 { 2151 template Check(string msg) 2152 { 2153 string old = s; 2154 2155 void fail() @safe pure 2156 { 2157 s = old; 2158 throw new Err(s,msg); 2159 } 2160 2161 void fail(Err e) @safe pure 2162 { 2163 s = old; 2164 throw new Err(s,msg,e); 2165 } 2166 2167 void fail(string msg2) @safe pure 2168 { 2169 fail(new Err(s,msg2)); 2170 } 2171 } 2172 2173 void checkMisc(ref string s) @safe pure // rule 27 2174 { 2175 import std.algorithm.searching : startsWith; 2176 2177 mixin Check!("Misc"); 2178 2179 try 2180 { 2181 if (s.startsWith("<!--")) { checkComment(s); } 2182 else if (s.startsWith("<?")) { checkPI(s); } 2183 else { checkSpace(s); } 2184 } 2185 catch (Err e) { fail(e); } 2186 } 2187 2188 void checkDocument(ref string s) @safe pure // rule 1 2189 { 2190 mixin Check!("Document"); 2191 try 2192 { 2193 checkProlog(s); 2194 checkElement(s); 2195 star!(checkMisc)(s); 2196 } 2197 catch (Err e) { fail(e); } 2198 } 2199 2200 void checkChars(ref string s) @safe pure // rule 2 2201 { 2202 // TO DO - Fix std.utf stride and decode functions, then use those 2203 // instead 2204 import std.format : format; 2205 2206 mixin Check!("Chars"); 2207 2208 dchar c; 2209 ptrdiff_t n = -1; 2210 // 'i' must not be smaller than size_t because size_t is used internally in 2211 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2212 foreach (size_t i, dchar d; s) 2213 { 2214 if (!isChar(d)) 2215 { 2216 c = d; 2217 n = i; 2218 break; 2219 } 2220 } 2221 if (n != -1) 2222 { 2223 s = s[n..$]; 2224 fail(format("invalid character: U+%04X",c)); 2225 } 2226 } 2227 2228 void checkSpace(ref string s) @safe pure // rule 3 2229 { 2230 import std.algorithm.searching : countUntil; 2231 import std.ascii : isWhite; 2232 import std.utf : byCodeUnit; 2233 2234 mixin Check!("Whitespace"); 2235 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2236 if (i == -1 && s.length > 0 && isWhite(s[0])) 2237 s = s[$ .. $]; 2238 else if (i > -1) 2239 s = s[i .. $]; 2240 if (s is old) fail(); 2241 } 2242 2243 void checkName(ref string s, out string name) @safe pure // rule 5 2244 { 2245 mixin Check!("Name"); 2246 2247 if (s.length == 0) fail(); 2248 ptrdiff_t n; 2249 // 'i' must not be smaller than size_t because size_t is used internally in 2250 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2251 foreach (size_t i, dchar c; s) 2252 { 2253 if (c == '_' || c == ':' || isLetter(c)) continue; 2254 if (i == 0) fail(); 2255 if (c == '-' || c == '.' || isDigit(c) 2256 || isCombiningChar(c) || isExtender(c)) continue; 2257 n = i; 2258 break; 2259 } 2260 name = s[0 .. n]; 2261 s = s[n..$]; 2262 } 2263 2264 void checkAttValue(ref string s) @safe pure // rule 10 2265 { 2266 import std.algorithm.searching : countUntil; 2267 import std.utf : byCodeUnit; 2268 2269 mixin Check!("AttValue"); 2270 2271 if (s.length == 0) fail(); 2272 char c = s[0]; 2273 if (c != '\u0022' && c != '\u0027') 2274 fail("attribute value requires quotes"); 2275 s = s[1..$]; 2276 for (;;) 2277 { 2278 s = s[s.byCodeUnit.countUntil(c) .. $]; 2279 if (s.length == 0) fail("unterminated attribute value"); 2280 if (s[0] == '<') fail("< found in attribute value"); 2281 if (s[0] == c) break; 2282 try { checkReference(s); } catch (Err e) { fail(e); } 2283 } 2284 s = s[1..$]; 2285 } 2286 2287 void checkCharData(ref string s) @safe pure // rule 14 2288 { 2289 import std.algorithm.searching : startsWith; 2290 2291 mixin Check!("CharData"); 2292 2293 while (s.length != 0) 2294 { 2295 if (s.startsWith("&")) break; 2296 if (s.startsWith("<")) break; 2297 if (s.startsWith("]]>")) fail("]]> found within char data"); 2298 s = s[1..$]; 2299 } 2300 } 2301 2302 void checkComment(ref string s) @safe pure // rule 15 2303 { 2304 import std.string : indexOf; 2305 2306 mixin Check!("Comment"); 2307 2308 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2309 ptrdiff_t n = s.indexOf("--"); 2310 if (n == -1) fail("unterminated comment"); 2311 s = s[n..$]; 2312 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2313 } 2314 2315 void checkPI(ref string s) @safe pure // rule 16 2316 { 2317 mixin Check!("PI"); 2318 2319 try 2320 { 2321 checkLiteral("<?",s); 2322 checkEnd("?>",s); 2323 } 2324 catch (Err e) { fail(e); } 2325 } 2326 2327 void checkCDSect(ref string s) @safe pure // rule 18 2328 { 2329 mixin Check!("CDSect"); 2330 2331 try 2332 { 2333 checkLiteral(cdata,s); 2334 checkEnd("]]>",s); 2335 } 2336 catch (Err e) { fail(e); } 2337 } 2338 2339 void checkProlog(ref string s) @safe pure // rule 22 2340 { 2341 mixin Check!("Prolog"); 2342 2343 try 2344 { 2345 /* The XML declaration is optional 2346 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2347 */ 2348 opt!(checkXMLDecl)(s); 2349 2350 star!(checkMisc)(s); 2351 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2352 } 2353 catch (Err e) { fail(e); } 2354 } 2355 2356 void checkXMLDecl(ref string s) @safe pure // rule 23 2357 { 2358 mixin Check!("XMLDecl"); 2359 2360 try 2361 { 2362 checkLiteral("<?xml",s); 2363 checkVersionInfo(s); 2364 opt!(checkEncodingDecl)(s); 2365 opt!(checkSDDecl)(s); 2366 opt!(checkSpace)(s); 2367 checkLiteral("?>",s); 2368 } 2369 catch (Err e) { fail(e); } 2370 } 2371 2372 void checkVersionInfo(ref string s) @safe pure // rule 24 2373 { 2374 mixin Check!("VersionInfo"); 2375 2376 try 2377 { 2378 checkSpace(s); 2379 checkLiteral("version",s); 2380 checkEq(s); 2381 quoted!(checkVersionNum)(s); 2382 } 2383 catch (Err e) { fail(e); } 2384 } 2385 2386 void checkEq(ref string s) @safe pure // rule 25 2387 { 2388 mixin Check!("Eq"); 2389 2390 try 2391 { 2392 opt!(checkSpace)(s); 2393 checkLiteral("=",s); 2394 opt!(checkSpace)(s); 2395 } 2396 catch (Err e) { fail(e); } 2397 } 2398 2399 void checkVersionNum(ref string s) @safe pure // rule 26 2400 { 2401 import std.algorithm.searching : countUntil; 2402 import std.utf : byCodeUnit; 2403 2404 mixin Check!("VersionNum"); 2405 2406 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2407 if (s is old) fail(); 2408 } 2409 2410 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2411 { 2412 mixin Check!("DocTypeDecl"); 2413 2414 try 2415 { 2416 checkLiteral("<!DOCTYPE",s); 2417 // 2418 // TO DO -- ensure DOCTYPE is well formed 2419 // (But not yet. That's one of our "future directions") 2420 // 2421 checkEnd(">",s); 2422 } 2423 catch (Err e) { fail(e); } 2424 } 2425 2426 void checkSDDecl(ref string s) @safe pure // rule 32 2427 { 2428 import std.algorithm.searching : startsWith; 2429 2430 mixin Check!("SDDecl"); 2431 2432 try 2433 { 2434 checkSpace(s); 2435 checkLiteral("standalone",s); 2436 checkEq(s); 2437 } 2438 catch (Err e) { fail(e); } 2439 2440 int n = 0; 2441 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2442 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2443 else fail("standalone attribute value must be 'yes', \"yes\","~ 2444 " 'no' or \"no\""); 2445 s = s[n..$]; 2446 } 2447 2448 void checkElement(ref string s) @safe pure // rule 39 2449 { 2450 mixin Check!("Element"); 2451 2452 string sname,ename,t; 2453 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2454 2455 if (t == "STag") 2456 { 2457 try 2458 { 2459 checkContent(s); 2460 t = s; 2461 checkETag(s,ename); 2462 } 2463 catch (Err e) { fail(e); } 2464 2465 if (sname != ename) 2466 { 2467 s = t; 2468 fail("end tag name \"" ~ ename 2469 ~ "\" differs from start tag name \""~sname~"\""); 2470 } 2471 } 2472 } 2473 2474 // rules 40 and 44 2475 void checkTag(ref string s, out string type, out string name) @safe pure 2476 { 2477 mixin Check!("Tag"); 2478 2479 try 2480 { 2481 type = "STag"; 2482 checkLiteral("<",s); 2483 checkName(s,name); 2484 star!(seq!(checkSpace,checkAttribute))(s); 2485 opt!(checkSpace)(s); 2486 if (s.length != 0 && s[0] == '/') 2487 { 2488 s = s[1..$]; 2489 type = "ETag"; 2490 } 2491 checkLiteral(">",s); 2492 } 2493 catch (Err e) { fail(e); } 2494 } 2495 2496 void checkAttribute(ref string s) @safe pure // rule 41 2497 { 2498 mixin Check!("Attribute"); 2499 2500 try 2501 { 2502 string name; 2503 checkName(s,name); 2504 checkEq(s); 2505 checkAttValue(s); 2506 } 2507 catch (Err e) { fail(e); } 2508 } 2509 2510 void checkETag(ref string s, out string name) @safe pure // rule 42 2511 { 2512 mixin Check!("ETag"); 2513 2514 try 2515 { 2516 checkLiteral("</",s); 2517 checkName(s,name); 2518 opt!(checkSpace)(s); 2519 checkLiteral(">",s); 2520 } 2521 catch (Err e) { fail(e); } 2522 } 2523 2524 void checkContent(ref string s) @safe pure // rule 43 2525 { 2526 import std.algorithm.searching : startsWith; 2527 2528 mixin Check!("Content"); 2529 2530 try 2531 { 2532 while (s.length != 0) 2533 { 2534 old = s; 2535 if (s.startsWith("&")) { checkReference(s); } 2536 else if (s.startsWith("<!--")) { checkComment(s); } 2537 else if (s.startsWith("<?")) { checkPI(s); } 2538 else if (s.startsWith(cdata)) { checkCDSect(s); } 2539 else if (s.startsWith("</")) { break; } 2540 else if (s.startsWith("<")) { checkElement(s); } 2541 else { checkCharData(s); } 2542 } 2543 } 2544 catch (Err e) { fail(e); } 2545 } 2546 2547 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2548 { 2549 import std.format : format; 2550 2551 mixin Check!("CharRef"); 2552 2553 c = 0; 2554 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2555 int radix = 10; 2556 if (s.length != 0 && s[0] == 'x') 2557 { 2558 s = s[1..$]; 2559 radix = 16; 2560 } 2561 if (s.length == 0) fail("unterminated character reference"); 2562 if (s[0] == ';') 2563 fail("character reference must have at least one digit"); 2564 while (s.length != 0) 2565 { 2566 immutable char d = s[0]; 2567 int n = 0; 2568 switch (d) 2569 { 2570 case 'F','f': ++n; goto case; 2571 case 'E','e': ++n; goto case; 2572 case 'D','d': ++n; goto case; 2573 case 'C','c': ++n; goto case; 2574 case 'B','b': ++n; goto case; 2575 case 'A','a': ++n; goto case; 2576 case '9': ++n; goto case; 2577 case '8': ++n; goto case; 2578 case '7': ++n; goto case; 2579 case '6': ++n; goto case; 2580 case '5': ++n; goto case; 2581 case '4': ++n; goto case; 2582 case '3': ++n; goto case; 2583 case '2': ++n; goto case; 2584 case '1': ++n; goto case; 2585 case '0': break; 2586 default: n = 100; break; 2587 } 2588 if (n >= radix) break; 2589 c *= radix; 2590 c += n; 2591 s = s[1..$]; 2592 } 2593 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2594 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2595 else s = s[1..$]; 2596 } 2597 2598 void checkReference(ref string s) @safe pure // rule 67 2599 { 2600 import std.algorithm.searching : startsWith; 2601 2602 mixin Check!("Reference"); 2603 2604 try 2605 { 2606 dchar c; 2607 if (s.startsWith("&#")) checkCharRef(s,c); 2608 else checkEntityRef(s); 2609 } 2610 catch (Err e) { fail(e); } 2611 } 2612 2613 void checkEntityRef(ref string s) @safe pure // rule 68 2614 { 2615 mixin Check!("EntityRef"); 2616 2617 try 2618 { 2619 string name; 2620 checkLiteral("&",s); 2621 checkName(s,name); 2622 checkLiteral(";",s); 2623 } 2624 catch (Err e) { fail(e); } 2625 } 2626 2627 void checkEncName(ref string s) @safe pure // rule 81 2628 { 2629 import std.algorithm.searching : countUntil; 2630 import std.ascii : isAlpha; 2631 import std.utf : byCodeUnit; 2632 2633 mixin Check!("EncName"); 2634 2635 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2636 if (s is old) fail(); 2637 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2638 } 2639 2640 void checkEncodingDecl(ref string s) @safe pure // rule 80 2641 { 2642 mixin Check!("EncodingDecl"); 2643 2644 try 2645 { 2646 checkSpace(s); 2647 checkLiteral("encoding",s); 2648 checkEq(s); 2649 quoted!(checkEncName)(s); 2650 } 2651 catch (Err e) { fail(e); } 2652 } 2653 2654 // Helper functions 2655 2656 void checkLiteral(string literal,ref string s) @safe pure 2657 { 2658 import std.string : startsWith; 2659 2660 mixin Check!("Literal"); 2661 2662 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2663 s = s[literal.length..$]; 2664 } 2665 2666 void checkEnd(string end,ref string s) @safe pure 2667 { 2668 import std.string : indexOf; 2669 // Deliberately no mixin Check here. 2670 2671 auto n = s.indexOf(end); 2672 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2673 s = s[n..$]; 2674 checkLiteral(end,s); 2675 } 2676 2677 // Metafunctions -- none of these use mixin Check 2678 2679 void opt(alias f)(ref string s) 2680 { 2681 try { f(s); } catch (Err e) {} 2682 } 2683 2684 void plus(alias f)(ref string s) 2685 { 2686 f(s); 2687 star!(f)(s); 2688 } 2689 2690 void star(alias f)(ref string s) 2691 { 2692 while (s.length != 0) 2693 { 2694 try { f(s); } 2695 catch (Err e) { return; } 2696 } 2697 } 2698 2699 void quoted(alias f)(ref string s) 2700 { 2701 import std.string : startsWith; 2702 2703 if (s.startsWith("'")) 2704 { 2705 checkLiteral("'",s); 2706 f(s); 2707 checkLiteral("'",s); 2708 } 2709 else 2710 { 2711 checkLiteral("\"",s); 2712 f(s); 2713 checkLiteral("\"",s); 2714 } 2715 } 2716 2717 void seq(alias f,alias g)(ref string s) 2718 { 2719 f(s); 2720 g(s); 2721 } 2722 } 2723 2724 /** 2725 * Check an entire XML document for well-formedness 2726 * 2727 * Params: 2728 * s = the document to be checked, passed as a string 2729 * 2730 * Throws: CheckException if the document is not well formed 2731 * 2732 * CheckException's toString() method will yield the complete hierarchy of 2733 * parse failure (the XML equivalent of a stack trace), giving the line and 2734 * column number of every failure at every level. 2735 */ 2736 void check(string s) @safe pure 2737 { 2738 try 2739 { 2740 checkChars(s); 2741 checkDocument(s); 2742 if (s.length != 0) throw new Err(s,"Junk found after document"); 2743 } 2744 catch (Err e) 2745 { 2746 e.complete(s); 2747 throw e; 2748 } 2749 } 2750 2751 @system pure unittest 2752 { 2753 import std.string : indexOf; 2754 2755 try 2756 { 2757 check(q"[<?xml version="1.0"?> 2758 <catalog> 2759 <book id="bk101"> 2760 <author>Gambardella, Matthew</author> 2761 <title>XML Developer's Guide</title> 2762 <genre>Computer</genre> 2763 <price>44.95</price> 2764 <publish_date>2000-10-01</publish_date> 2765 <description>An in-depth look at creating applications 2766 with XML.</description> 2767 </book> 2768 <book id="bk102"> 2769 <author>Ralls, Kim</author> 2770 <title>Midnight Rain</title> 2771 <genre>Fantasy</genres> 2772 <price>5.95</price> 2773 <publish_date>2000-12-16</publish_date> 2774 <description>A former architect battles corporate zombies, 2775 an evil sorceress, and her own childhood to become queen 2776 of the world.</description> 2777 </book> 2778 <book id="bk103"> 2779 <author>Corets, Eva</author> 2780 <title>Maeve Ascendant</title> 2781 <genre>Fantasy</genre> 2782 <price>5.95</price> 2783 <publish_date>2000-11-17</publish_date> 2784 <description>After the collapse of a nanotechnology 2785 society in England, the young survivors lay the 2786 foundation for a new society.</description> 2787 </book> 2788 </catalog> 2789 ]"); 2790 assert(false); 2791 } 2792 catch (CheckException e) 2793 { 2794 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2795 " from start tag name \"genre\""); 2796 assert(n != -1); 2797 } 2798 } 2799 2800 @system unittest 2801 { 2802 string s = q"EOS 2803 <?xml version="1.0"?> 2804 <set> 2805 <one>A</one> 2806 <!-- comment --> 2807 <two>B</two> 2808 </set> 2809 EOS"; 2810 try 2811 { 2812 check(s); 2813 } 2814 catch (CheckException e) 2815 { 2816 assert(0, e.toString()); 2817 } 2818 } 2819 2820 @system unittest 2821 { 2822 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2823 xmlns:stream="http://etherx.'jabber'.org/streams" 2824 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2825 xml:lang="en" version="1.0" attr='a"b"c'> 2826 </stream:stream></r>`; 2827 2828 DocumentParser parser = new DocumentParser(test_xml); 2829 bool tested = false; 2830 parser.onStartTag["stream:stream"] = (ElementParser p) { 2831 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2832 assert(p.tag.attr["from"] == "jid.pl"); 2833 assert(p.tag.attr["attr"] == "a\"b\"c"); 2834 tested = true; 2835 }; 2836 parser.parse(); 2837 assert(tested); 2838 } 2839 2840 @system unittest 2841 { 2842 string s = q"EOS 2843 <?xml version="1.0" encoding="utf-8"?> <Tests> 2844 <Test thing="What & Up">What & Up Second</Test> 2845 </Tests> 2846 EOS"; 2847 auto xml = new DocumentParser(s); 2848 2849 xml.onStartTag["Test"] = (ElementParser xml) { 2850 assert(xml.tag.attr["thing"] == "What & Up"); 2851 }; 2852 2853 xml.onEndTag["Test"] = (in Element e) { 2854 assert(e.text() == "What & Up Second"); 2855 }; 2856 xml.parse(); 2857 } 2858 2859 @system unittest 2860 { 2861 string s = `<tag attr=""value>" />`; 2862 auto doc = new Document(s); 2863 assert(doc.toString() == s); 2864 } 2865 2866 /** The base class for exceptions thrown by this module */ 2867 class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2868 2869 // Other exceptions 2870 2871 /// Thrown during Comment constructor 2872 class CommentException : XMLException 2873 { private this(string msg) @safe pure { super(msg); } } 2874 2875 /// Thrown during CData constructor 2876 class CDataException : XMLException 2877 { private this(string msg) @safe pure { super(msg); } } 2878 2879 /// Thrown during XMLInstruction constructor 2880 class XIException : XMLException 2881 { private this(string msg) @safe pure { super(msg); } } 2882 2883 /// Thrown during ProcessingInstruction constructor 2884 class PIException : XMLException 2885 { private this(string msg) @safe pure { super(msg); } } 2886 2887 /// Thrown during Text constructor 2888 class TextException : XMLException 2889 { private this(string msg) @safe pure { super(msg); } } 2890 2891 /// Thrown during decode() 2892 class DecodeException : XMLException 2893 { private this(string msg) @safe pure { super(msg); } } 2894 2895 /// Thrown if comparing with wrong type 2896 class InvalidTypeException : XMLException 2897 { private this(string msg) @safe pure { super(msg); } } 2898 2899 /// Thrown when parsing for Tags 2900 class TagException : XMLException 2901 { private this(string msg) @safe pure { super(msg); } } 2902 2903 /** 2904 * Thrown during check() 2905 */ 2906 class CheckException : XMLException 2907 { 2908 CheckException err; /// Parent in hierarchy 2909 private string tail; 2910 /** 2911 * Name of production rule which failed to parse, 2912 * or specific error message 2913 */ 2914 string msg; 2915 size_t line = 0; /// Line number at which parse failure occurred 2916 size_t column = 0; /// Column number at which parse failure occurred 2917 2918 private this(string tail,string msg,Err err=null) @safe pure 2919 { 2920 super(null); 2921 this.tail = tail; 2922 this.msg = msg; 2923 this.err = err; 2924 } 2925 2926 private void complete(string entire) @safe pure 2927 { 2928 import std.string : count, lastIndexOf; 2929 import std.utf : toUTF32; 2930 2931 string head = entire[0..$-tail.length]; 2932 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2933 line = head.count("\n") + 1; 2934 dstring t = toUTF32(head[n..$]); 2935 column = t.length + 1; 2936 if (err !is null) err.complete(entire); 2937 } 2938 2939 override string toString() const @safe pure 2940 { 2941 import std.format : format; 2942 2943 string s; 2944 if (line != 0) s = format("Line %d, column %d: ",line,column); 2945 s ~= msg; 2946 s ~= '\n'; 2947 if (err !is null) s = err.toString() ~ s; 2948 return s; 2949 } 2950 } 2951 2952 private alias Err = CheckException; 2953 2954 // Private helper functions 2955 2956 private 2957 { 2958 inout(T) toType(T)(inout return scope Object o) 2959 { 2960 T t = cast(T)(o); 2961 if (t is null) 2962 { 2963 throw new InvalidTypeException("Attempt to compare a " 2964 ~ T.stringof ~ " with an instance of another type"); 2965 } 2966 return t; 2967 } 2968 2969 string chop(ref string s, size_t n) @safe pure nothrow 2970 { 2971 if (n == -1) n = s.length; 2972 string t = s[0 .. n]; 2973 s = s[n..$]; 2974 return t; 2975 } 2976 2977 bool optc(ref string s, char c) @safe pure nothrow 2978 { 2979 immutable bool b = s.length != 0 && s[0] == c; 2980 if (b) s = s[1..$]; 2981 return b; 2982 } 2983 2984 void reqc(ref string s, char c) @safe pure 2985 { 2986 if (s.length == 0 || s[0] != c) throw new TagException(""); 2987 s = s[1..$]; 2988 } 2989 2990 char requireOneOf(ref string s, string chars) @safe pure 2991 { 2992 import std.string : indexOf; 2993 2994 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2995 throw new TagException(""); 2996 immutable char ch = s[0]; 2997 s = s[1..$]; 2998 return ch; 2999 } 3000 3001 alias hash = .hashOf; 3002 3003 // Definitions from the XML specification 3004 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3005 0x10000,0x10FFFF]; 3006 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3007 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3008 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3009 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3010 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3011 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3012 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3013 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3014 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3015 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3016 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3017 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3018 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3019 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3020 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3021 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3022 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3023 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3024 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3025 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3026 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3027 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3028 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3029 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3030 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3031 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3032 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3033 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3034 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3035 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3036 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3037 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3038 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3039 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3040 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3041 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3042 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3043 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3044 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3045 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3046 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3047 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3048 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3049 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3050 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3051 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3052 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3053 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3054 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3055 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3056 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3057 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3058 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3059 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3060 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3061 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3062 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3063 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3064 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3065 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3066 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3067 0x3099,0x3099,0x309A,0x309A]; 3068 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3069 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3070 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3071 0x0ED9,0x0F20,0x0F29]; 3072 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3073 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3074 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3075 3076 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3077 { 3078 while (table.length != 0) 3079 { 3080 auto m = (table.length >> 1) & ~1; 3081 if (c < table[m]) 3082 { 3083 table = table[0 .. m]; 3084 } 3085 else if (c > table[m+1]) 3086 { 3087 table = table[m+2..$]; 3088 } 3089 else return true; 3090 } 3091 return false; 3092 } 3093 3094 string startOf(string s) @safe nothrow pure 3095 { 3096 string r; 3097 foreach (char c;s) 3098 { 3099 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3100 if (r.length >= 40) { r ~= "___"; break; } 3101 } 3102 return r; 3103 } 3104 3105 void exit(string s=null) 3106 { 3107 throw new XMLException(s); 3108 } 3109 }