1 // Written in the D programming language. 2 3 /** 4 $(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will remain until we have a suitable replacement, 6 but be aware that it will not remain long term.) 7 8 Classes and functions for creating and parsing XML 9 10 The basic architecture of this module is that there are standalone functions, 11 classes for constructing an XML document from scratch (Tag, Element and 12 Document), and also classes for parsing a pre-existing XML file (ElementParser 13 and DocumentParser). The parsing classes <i>may</i> be used to build a 14 Document, but that is not their primary purpose. The handling capabilities of 15 DocumentParser and ElementParser are sufficiently customizable that you can 16 make them do pretty much whatever you want. 17 18 Example: This example creates a DOM (Document Object Model) tree 19 from an XML file. 20 ------------------------------------------------------------------------------ 21 import std.xml; 22 import std.stdio; 23 import std.string; 24 import std.file; 25 26 // books.xml is used in various samples throughout the Microsoft XML Core 27 // Services (MSXML) SDK. 28 // 29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 30 31 void main() 32 { 33 string s = cast(string) std.file.read("books.xml"); 34 35 // Check for well-formedness 36 check(s); 37 38 // Make a DOM tree 39 auto doc = new Document(s); 40 41 // Plain-print it 42 writeln(doc); 43 } 44 ------------------------------------------------------------------------------ 45 46 Example: This example does much the same thing, except that the file is 47 deconstructed and reconstructed by hand. This is more work, but the 48 techniques involved offer vastly more power. 49 ------------------------------------------------------------------------------ 50 import std.xml; 51 import std.stdio; 52 import std.string; 53 54 struct Book 55 { 56 string id; 57 string author; 58 string title; 59 string genre; 60 string price; 61 string pubDate; 62 string description; 63 } 64 65 void main() 66 { 67 string s = cast(string) std.file.read("books.xml"); 68 69 // Check for well-formedness 70 check(s); 71 72 // Take it apart 73 Book[] books; 74 75 auto xml = new DocumentParser(s); 76 xml.onStartTag["book"] = (ElementParser xml) 77 { 78 Book book; 79 book.id = xml.tag.attr["id"]; 80 81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 87 88 xml.parse(); 89 90 books ~= book; 91 }; 92 xml.parse(); 93 94 // Put it back together again; 95 auto doc = new Document(new Tag("catalog")); 96 foreach (book;books) 97 { 98 auto element = new Element("book"); 99 element.tag.attr["id"] = book.id; 100 101 element ~= new Element("author", book.author); 102 element ~= new Element("title", book.title); 103 element ~= new Element("genre", book.genre); 104 element ~= new Element("price", book.price); 105 element ~= new Element("publish-date",book.pubDate); 106 element ~= new Element("description", book.description); 107 108 doc ~= element; 109 } 110 111 // Pretty-print it 112 writefln(join(doc.pretty(3),"\n")); 113 } 114 ------------------------------------------------------------------------------- 115 Copyright: Copyright Janice Caron 2008 - 2009. 116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 117 Authors: Janice Caron 118 Source: $(PHOBOSSRC std/_xml.d) 119 */ 120 /* 121 Copyright Janice Caron 2008 - 2009. 122 Distributed under the Boost Software License, Version 1.0. 123 (See accompanying file LICENSE_1_0.txt or copy at 124 http://www.boost.org/LICENSE_1_0.txt) 125 */ 126 module orange.xml.PhobosXml; 127 128 enum cdata = "<![CDATA["; 129 130 /** 131 * Returns true if the character is a character according to the XML standard 132 * 133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 134 * 135 * Params: 136 * c = the character to be tested 137 */ 138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 139 { 140 if (c <= 0xD7FF) 141 { 142 if (c >= 0x20) 143 return true; 144 switch (c) 145 { 146 case 0xA: 147 case 0x9: 148 case 0xD: 149 return true; 150 default: 151 return false; 152 } 153 } 154 else if (0xE000 <= c && c <= 0x10FFFF) 155 { 156 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 157 return true; 158 } 159 return false; 160 } 161 162 @safe @nogc nothrow pure unittest 163 { 164 assert(!isChar(cast(dchar) 0x8)); 165 assert( isChar(cast(dchar) 0x9)); 166 assert( isChar(cast(dchar) 0xA)); 167 assert(!isChar(cast(dchar) 0xB)); 168 assert(!isChar(cast(dchar) 0xC)); 169 assert( isChar(cast(dchar) 0xD)); 170 assert(!isChar(cast(dchar) 0xE)); 171 assert(!isChar(cast(dchar) 0x1F)); 172 assert( isChar(cast(dchar) 0x20)); 173 assert( isChar('J')); 174 assert( isChar(cast(dchar) 0xD7FF)); 175 assert(!isChar(cast(dchar) 0xD800)); 176 assert(!isChar(cast(dchar) 0xDFFF)); 177 assert( isChar(cast(dchar) 0xE000)); 178 assert( isChar(cast(dchar) 0xFFFD)); 179 assert(!isChar(cast(dchar) 0xFFFE)); 180 assert(!isChar(cast(dchar) 0xFFFF)); 181 assert( isChar(cast(dchar) 0x10000)); 182 assert( isChar(cast(dchar) 0x10FFFF)); 183 assert(!isChar(cast(dchar) 0x110000)); 184 185 debug (stdxml_TestHardcodedChecks) 186 { 187 foreach (c; 0 .. dchar.max + 1) 188 assert(isChar(c) == lookup(CharTable, c)); 189 } 190 } 191 192 /** 193 * Returns true if the character is whitespace according to the XML standard 194 * 195 * Only the following characters are considered whitespace in XML - space, tab, 196 * carriage return and linefeed 197 * 198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 199 * 200 * Params: 201 * c = the character to be tested 202 */ 203 bool isSpace(dchar c) @safe @nogc pure nothrow 204 { 205 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 206 } 207 208 /** 209 * Returns true if the character is a digit according to the XML standard 210 * 211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 212 * 213 * Params: 214 * c = the character to be tested 215 */ 216 bool isDigit(dchar c) @safe @nogc pure nothrow 217 { 218 if (c <= 0x0039 && c >= 0x0030) 219 return true; 220 else 221 return lookup(DigitTable,c); 222 } 223 224 @safe @nogc nothrow pure unittest 225 { 226 debug (stdxml_TestHardcodedChecks) 227 { 228 foreach (c; 0 .. dchar.max + 1) 229 assert(isDigit(c) == lookup(DigitTable, c)); 230 } 231 } 232 233 /** 234 * Returns true if the character is a letter according to the XML standard 235 * 236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 237 * 238 * Params: 239 * c = the character to be tested 240 */ 241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 242 { 243 return isIdeographic(c) || isBaseChar(c); 244 } 245 246 /** 247 * Returns true if the character is an ideographic character according to the 248 * XML standard 249 * 250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 251 * 252 * Params: 253 * c = the character to be tested 254 */ 255 bool isIdeographic(dchar c) @safe @nogc nothrow pure 256 { 257 if (c == 0x3007) 258 return true; 259 if (c <= 0x3029 && c >= 0x3021 ) 260 return true; 261 if (c <= 0x9FA5 && c >= 0x4E00) 262 return true; 263 return false; 264 } 265 266 @safe @nogc nothrow pure unittest 267 { 268 assert(isIdeographic('\u4E00')); 269 assert(isIdeographic('\u9FA5')); 270 assert(isIdeographic('\u3007')); 271 assert(isIdeographic('\u3021')); 272 assert(isIdeographic('\u3029')); 273 274 debug (stdxml_TestHardcodedChecks) 275 { 276 foreach (c; 0 .. dchar.max + 1) 277 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 278 } 279 } 280 281 /** 282 * Returns true if the character is a base character according to the XML 283 * standard 284 * 285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 286 * 287 * Params: 288 * c = the character to be tested 289 */ 290 bool isBaseChar(dchar c) @safe @nogc nothrow pure 291 { 292 return lookup(BaseCharTable,c); 293 } 294 295 /** 296 * Returns true if the character is a combining character according to the 297 * XML standard 298 * 299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 300 * 301 * Params: 302 * c = the character to be tested 303 */ 304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure 305 { 306 return lookup(CombiningCharTable,c); 307 } 308 309 /** 310 * Returns true if the character is an extender according to the XML standard 311 * 312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 313 * 314 * Params: 315 * c = the character to be tested 316 */ 317 bool isExtender(dchar c) @safe @nogc nothrow pure 318 { 319 return lookup(ExtenderTable,c); 320 } 321 322 /** 323 * Encodes a string by replacing all characters which need to be escaped with 324 * appropriate predefined XML entities. 325 * 326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 327 * and greater-than), and similarly, decode() unescapes them. These functions 328 * are provided for convenience only. You do not need to use them when using 329 * the std.xml classes, because then all the encoding and decoding will be done 330 * for you automatically. 331 * 332 * If the string is not modified, the original will be returned. 333 * 334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 335 * 336 * Params: 337 * s = The string to be encoded 338 * 339 * Returns: The encoded string 340 * 341 * Example: 342 * -------------- 343 * writefln(encode("a > b")); // writes "a > b" 344 * -------------- 345 */ 346 S encode(S)(S s) 347 { 348 import std.array : appender; 349 350 string r; 351 size_t lastI; 352 auto result = appender!S(); 353 354 foreach (i, c; s) 355 { 356 switch (c) 357 { 358 case '&': r = "&"; break; 359 case '"': r = """; break; 360 case '\'': r = "'"; break; 361 case '<': r = "<"; break; 362 case '>': r = ">"; break; 363 default: continue; 364 } 365 // Replace with r 366 result.put(s[lastI .. i]); 367 result.put(r); 368 lastI = i + 1; 369 } 370 371 if (!result.data.ptr) return s; 372 result.put(s[lastI .. $]); 373 return result.data; 374 } 375 376 @safe pure unittest 377 { 378 auto s = "hello"; 379 assert(encode(s) is s); 380 assert(encode("a > b") == "a > b", encode("a > b")); 381 assert(encode("a < b") == "a < b"); 382 assert(encode("don't") == "don't"); 383 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 384 assert(encode("cat & dog") == "cat & dog"); 385 } 386 387 /** 388 * Mode to use for decoding. 389 * 390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 393 */ 394 enum DecodeMode 395 { 396 NONE, LOOSE, STRICT 397 } 398 399 /** 400 * Decodes a string by unescaping all predefined XML entities. 401 * 402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 403 * and greater-than), and similarly, decode() unescapes them. These functions 404 * are provided for convenience only. You do not need to use them when using 405 * the std.xml classes, because then all the encoding and decoding will be done 406 * for you automatically. 407 * 408 * This function decodes the entities &amp;, &quot;, &apos;, 409 * &lt; and &gt, 410 * as well as decimal and hexadecimal entities such as &#x20AC; 411 * 412 * If the string does not contain an ampersand, the original will be returned. 413 * 414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 416 * (decode, and throw a DecodeException in the event of an error). 417 * 418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 419 * 420 * Params: 421 * s = The string to be decoded 422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 423 * 424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 425 * 426 * Returns: The decoded string 427 * 428 * Example: 429 * -------------- 430 * writefln(decode("a > b")); // writes "a > b" 431 * -------------- 432 */ 433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 434 { 435 import std.algorithm.searching : startsWith; 436 437 if (mode == DecodeMode.NONE) return s; 438 439 string buffer; 440 foreach (ref i; 0 .. s.length) 441 { 442 char c = s[i]; 443 if (c != '&') 444 { 445 if (buffer.length != 0) buffer ~= c; 446 } 447 else 448 { 449 if (buffer.length == 0) 450 { 451 buffer = s[0 .. i].dup; 452 } 453 if (startsWith(s[i..$],"&#")) 454 { 455 try 456 { 457 dchar d; 458 string t = s[i..$]; 459 checkCharRef(t, d); 460 char[4] temp; 461 import std.utf : encode; 462 buffer ~= temp[0 .. encode(temp, d)]; 463 i = s.length - t.length - 1; 464 } 465 catch (Err e) 466 { 467 if (mode == DecodeMode.STRICT) 468 throw new DecodeException("Unescaped &"); 469 buffer ~= '&'; 470 } 471 } 472 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 473 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 474 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 475 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 476 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 477 else 478 { 479 if (mode == DecodeMode.STRICT) 480 throw new DecodeException("Unescaped &"); 481 buffer ~= '&'; 482 } 483 } 484 } 485 return (buffer.length == 0) ? s : buffer; 486 } 487 488 @safe pure unittest 489 { 490 void assertNot(string s) pure 491 { 492 bool b = false; 493 try { decode(s,DecodeMode.STRICT); } 494 catch (DecodeException e) { b = true; } 495 assert(b,s); 496 } 497 498 // Assert that things that should work, do 499 auto s = "hello"; 500 assert(decode(s, DecodeMode.STRICT) is s); 501 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 502 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 503 assert(decode("don't", DecodeMode.STRICT) == "don't"); 504 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 505 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 506 assert(decode("*", DecodeMode.STRICT) == "*"); 507 assert(decode("*", DecodeMode.STRICT) == "*"); 508 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 509 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 510 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 511 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 512 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 513 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 514 515 // Assert that things that shouldn't work, don't 516 assertNot("cat & dog"); 517 assertNot("a > b"); 518 assertNot("&#;"); 519 assertNot("&#x;"); 520 assertNot("G;"); 521 assertNot("G;"); 522 } 523 524 /** 525 * Class representing an XML document. 526 * 527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 528 * 529 */ 530 class Document : Element 531 { 532 /** 533 * Contains all text which occurs before the root element. 534 * Defaults to <?xml version="1.0"?> 535 */ 536 string prolog = "<?xml version=\"1.0\"?>"; 537 /** 538 * Contains all text which occurs after the root element. 539 * Defaults to the empty string 540 */ 541 string epilog; 542 543 /** 544 * Constructs a Document by parsing XML text. 545 * 546 * This function creates a complete DOM (Document Object Model) tree. 547 * 548 * The input to this function MUST be valid XML. 549 * This is enforced by DocumentParser's in contract. 550 * 551 * Params: 552 * s = the complete XML text. 553 */ 554 this(string s) 555 in 556 { 557 assert(s.length != 0); 558 } 559 body 560 { 561 auto xml = new DocumentParser(s); 562 string tagString = xml.tag.tagString; 563 564 this(xml.tag); 565 prolog = s[0 .. tagString.ptr - s.ptr]; 566 parse(xml); 567 epilog = *xml.s; 568 } 569 570 /** 571 * Constructs a Document from a Tag. 572 * 573 * Params: 574 * tag = the start tag of the document. 575 */ 576 this(const(Tag) tag) 577 { 578 super(tag); 579 } 580 581 const 582 { 583 /** 584 * Compares two Documents for equality 585 * 586 * Example: 587 * -------------- 588 * Document d1,d2; 589 * if (d1 == d2) { } 590 * -------------- 591 */ 592 override bool opEquals(scope const Object o) const 593 { 594 const doc = toType!(const Document)(o); 595 return prolog == doc.prolog 596 && (cast(const) this).Element.opEquals(cast(const) doc) 597 && epilog == doc.epilog; 598 } 599 600 /** 601 * Compares two Documents 602 * 603 * You should rarely need to call this function. It exists so that 604 * Documents can be used as associative array keys. 605 * 606 * Example: 607 * -------------- 608 * Document d1,d2; 609 * if (d1 < d2) { } 610 * -------------- 611 */ 612 override int opCmp(scope const Object o) scope const 613 { 614 const doc = toType!(const Document)(o); 615 if (prolog != doc.prolog) 616 return prolog < doc.prolog ? -1 : 1; 617 if (int cmp = this.Element.opCmp(doc)) 618 return cmp; 619 if (epilog != doc.epilog) 620 return epilog < doc.epilog ? -1 : 1; 621 return 0; 622 } 623 624 /** 625 * Returns the hash of a Document 626 * 627 * You should rarely need to call this function. It exists so that 628 * Documents can be used as associative array keys. 629 */ 630 override size_t toHash() scope const @trusted 631 { 632 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 633 } 634 635 /** 636 * Returns the string representation of a Document. (That is, the 637 * complete XML of a document). 638 */ 639 override string toString() scope const @safe 640 { 641 return prolog ~ super.toString() ~ epilog; 642 } 643 } 644 } 645 646 @system unittest 647 { 648 // https://issues.dlang.org/show_bug.cgi?id=14966 649 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 650 651 auto a = new Document(xml); 652 auto b = new Document(xml); 653 assert(a == b); 654 assert(!(a < b)); 655 int[Document] aa; 656 aa[a] = 1; 657 assert(aa[b] == 1); 658 659 b ~= new Element("b"); 660 assert(a < b); 661 assert(b > a); 662 } 663 664 /** 665 * Class representing an XML element. 666 * 667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 668 */ 669 class Element : Item 670 { 671 Tag tag; /// The start tag of the element 672 Item[] items; /// The element's items 673 Text[] texts; /// The element's text items 674 CData[] cdatas; /// The element's CData items 675 Comment[] comments; /// The element's comments 676 ProcessingInstruction[] pis; /// The element's processing instructions 677 Element[] elements; /// The element's child elements 678 Element parent; 679 680 string name () 681 { 682 return tag.name; 683 } 684 685 string value () 686 { 687 return text; 688 } 689 690 /** 691 * Constructs an Element given a name and a string to be used as a Text 692 * interior. 693 * 694 * Params: 695 * name = the name of the element. 696 * interior = (optional) the string interior. 697 * 698 * Example: 699 * ------------------------------------------------------- 700 * auto element = new Element("title","Serenity") 701 * // constructs the element <title>Serenity</title> 702 * ------------------------------------------------------- 703 */ 704 this(string name, string interior=null) @safe pure 705 { 706 this(new Tag(name)); 707 if (interior.length != 0) this ~= new Text(interior); 708 } 709 710 /** 711 * Constructs an Element from a Tag. 712 * 713 * Params: 714 * tag_ = the start or empty tag of the element. 715 */ 716 this(const(Tag) tag_) @safe pure 717 { 718 this.tag = new Tag(tag_.name); 719 tag.type = TagType.EMPTY; 720 foreach (k,v;tag_.attr) tag.attr[k] = v; 721 tag.tagString = tag_.tagString; 722 } 723 724 /** 725 * Append a text item to the interior of this element 726 * 727 * Params: 728 * item = the item you wish to append. 729 * 730 * Example: 731 * -------------- 732 * Element element; 733 * element ~= new Text("hello"); 734 * -------------- 735 */ 736 void opOpAssign(string op : "~")(Text item) @safe pure 737 { 738 texts ~= item; 739 appendItem(item); 740 } 741 742 /** 743 * Append a CData item to the interior of this element 744 * 745 * Params: 746 * item = the item you wish to append. 747 * 748 * Example: 749 * -------------- 750 * Element element; 751 * element ~= new CData("hello"); 752 * -------------- 753 */ 754 void opOpAssign(string op : "~")(CData item) @safe pure 755 { 756 cdatas ~= item; 757 appendItem(item); 758 } 759 760 /** 761 * Append a comment to the interior of this element 762 * 763 * Params: 764 * item = the item you wish to append. 765 * 766 * Example: 767 * -------------- 768 * Element element; 769 * element ~= new Comment("hello"); 770 * -------------- 771 */ 772 void opOpAssign(string op : "~")(Comment item) @safe pure 773 { 774 comments ~= item; 775 appendItem(item); 776 } 777 778 /** 779 * Append a processing instruction to the interior of this element 780 * 781 * Params: 782 * item = the item you wish to append. 783 * 784 * Example: 785 * -------------- 786 * Element element; 787 * element ~= new ProcessingInstruction("hello"); 788 * -------------- 789 */ 790 void opOpAssign(string op : "~")(ProcessingInstruction item) @safe pure 791 { 792 pis ~= item; 793 appendItem(item); 794 } 795 796 /** 797 * Append a complete element to the interior of this element 798 * 799 * Params: 800 * item = the item you wish to append. 801 * 802 * Example: 803 * -------------- 804 * Element element; 805 * Element other = new Element("br"); 806 * element ~= other; 807 * // appends element representing <br /> 808 * -------------- 809 */ 810 void opOpAssign(string op : "~")(Element item) @safe pure 811 { 812 elements ~= item; 813 appendItem(item); 814 } 815 816 private void appendItem(Item item) @safe pure 817 { 818 items ~= item; 819 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 820 tag.type = TagType.START; 821 } 822 823 private void parse(ElementParser xml) 824 { 825 xml.onText = (string s) { this ~= new Text(s); }; 826 xml.onCData = (string s) { this ~= new CData(s); }; 827 xml.onComment = (string s) { this ~= new Comment(s); }; 828 xml.onPI = (string s) { this ~= new ProcessingInstruction(s); }; 829 830 xml.onStartTag[null] = (ElementParser xml) 831 { 832 auto e = new Element(xml.tag); 833 e.parse(xml); 834 this ~= e; 835 }; 836 837 xml.parse(); 838 } 839 840 /** 841 * Compares two Elements for equality 842 * 843 * Example: 844 * -------------- 845 * Element e1,e2; 846 * if (e1 == e2) { } 847 * -------------- 848 */ 849 override bool opEquals(scope const Object o) const 850 { 851 const element = toType!(const Element)(o); 852 immutable len = items.length; 853 if (len != element.items.length) return false; 854 foreach (i; 0 .. len) 855 { 856 if (!items[i].opEquals(element.items[i])) return false; 857 } 858 return true; 859 } 860 861 /** 862 * Compares two Elements 863 * 864 * You should rarely need to call this function. It exists so that Elements 865 * can be used as associative array keys. 866 * 867 * Example: 868 * -------------- 869 * Element e1,e2; 870 * if (e1 < e2) { } 871 * -------------- 872 */ 873 override int opCmp(scope const Object o) @safe const 874 { 875 const element = toType!(const Element)(o); 876 for (uint i=0; ; ++i) 877 { 878 if (i == items.length && i == element.items.length) return 0; 879 if (i == items.length) return -1; 880 if (i == element.items.length) return 1; 881 if (!items[i].opEquals(element.items[i])) 882 return items[i].opCmp(element.items[i]); 883 } 884 } 885 886 /** 887 * Returns the hash of an Element 888 * 889 * You should rarely need to call this function. It exists so that Elements 890 * can be used as associative array keys. 891 */ 892 override size_t toHash() scope const @safe 893 { 894 size_t hash = tag.toHash(); 895 foreach (item;items) hash += item.toHash(); 896 return hash; 897 } 898 899 const 900 { 901 /** 902 * Returns the decoded interior of an element. 903 * 904 * The element is assumed to contain text <i>only</i>. So, for 905 * example, given XML such as "<title>Good &amp; 906 * Bad</title>", will return "Good & Bad". 907 * 908 * Params: 909 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 910 * 911 * Throws: DecodeException if decode fails 912 */ 913 string text(DecodeMode mode=DecodeMode.LOOSE) 914 { 915 string buffer; 916 foreach (item;items) 917 { 918 Text t = cast(Text) item; 919 if (t is null) throw new DecodeException(item.toString()); 920 buffer ~= decode(t.toString(),mode); 921 } 922 return buffer; 923 } 924 925 /** 926 * Returns an indented string representation of this item 927 * 928 * Params: 929 * indent = (optional) number of spaces by which to indent this 930 * element. Defaults to 2. 931 */ 932 override string[] pretty(uint indent=2) scope 933 { 934 import std.algorithm.searching : count; 935 import std..string : rightJustify; 936 937 if (isEmptyXML) return [ tag.toEmptyString() ]; 938 939 if (items.length == 1) 940 { 941 auto t = cast(const(Text))(items[0]); 942 if (t !is null) 943 { 944 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 945 } 946 } 947 948 string[] a = [ tag.toStartString() ]; 949 foreach (item;items) 950 { 951 string[] b = item.pretty(indent); 952 foreach (s;b) 953 { 954 a ~= rightJustify(s,count(s) + indent); 955 } 956 } 957 a ~= tag.toEndString(); 958 return a; 959 } 960 961 /** 962 * Returns the string representation of an Element 963 * 964 * Example: 965 * -------------- 966 * auto element = new Element("br"); 967 * writefln(element.toString()); // writes "<br />" 968 * -------------- 969 */ 970 override string toString() scope @safe 971 { 972 if (isEmptyXML) return tag.toEmptyString(); 973 974 string buffer = tag.toStartString(); 975 foreach (item;items) { buffer ~= item.toString(); } 976 buffer ~= tag.toEndString(); 977 return buffer; 978 } 979 980 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 981 } 982 } 983 984 /** 985 * Tag types. 986 * 987 * $(DDOC_ENUM_MEMBERS START) Used for start tags 988 * $(DDOC_ENUM_MEMBERS END) Used for end tags 989 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 990 * 991 */ 992 enum TagType { START, END, EMPTY } 993 994 /** 995 * Class representing an XML tag. 996 * 997 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 998 * 999 * The class invariant guarantees 1000 * <ul> 1001 * <li> that $(B type) is a valid enum TagType value</li> 1002 * <li> that $(B name) consists of valid characters</li> 1003 * <li> that each attribute name consists of valid characters</li> 1004 * </ul> 1005 */ 1006 class Tag 1007 { 1008 TagType type = TagType.START; /// Type of tag 1009 string name; /// Tag name 1010 string[string] attr; /// Associative array of attributes 1011 private string tagString; 1012 1013 invariant() 1014 { 1015 string s; 1016 string t; 1017 1018 assert(type == TagType.START 1019 || type == TagType.END 1020 || type == TagType.EMPTY); 1021 1022 s = name; 1023 try { checkName(s,t); } 1024 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1025 1026 foreach (k,v;attr) 1027 { 1028 s = k; 1029 try { checkName(s,t); } 1030 catch (Err e) 1031 { assert(false,"Invalid atrribute name:" ~ e.toString()); } 1032 } 1033 } 1034 1035 /** 1036 * Constructs an instance of Tag with a specified name and type 1037 * 1038 * The constructor does not initialize the attributes. To initialize the 1039 * attributes, you access the $(B attr) member variable. 1040 * 1041 * Params: 1042 * name = the Tag's name 1043 * type = (optional) the Tag's type. If omitted, defaults to 1044 * TagType.START. 1045 * 1046 * Example: 1047 * -------------- 1048 * auto tag = new Tag("img",Tag.EMPTY); 1049 * tag.attr["src"] = "http://example.com/example.jpg"; 1050 * -------------- 1051 */ 1052 this(string name, TagType type=TagType.START) @safe pure 1053 { 1054 this.name = name; 1055 this.type = type; 1056 } 1057 1058 /* Private constructor (so don't ddoc this!) 1059 * 1060 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1061 * 1062 * The string is passed by reference, and is advanced over all characters 1063 * consumed. 1064 * 1065 * The second parameter is a dummy parameter only, required solely to 1066 * distinguish this constructor from the public one. 1067 */ 1068 private this(ref string s, bool dummy) @safe pure 1069 { 1070 import std.algorithm.searching : countUntil; 1071 import std.ascii : isWhite; 1072 import std.utf : byCodeUnit; 1073 1074 tagString = s; 1075 try 1076 { 1077 reqc(s,'<'); 1078 if (optc(s,'/')) type = TagType.END; 1079 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1080 name = s[0 .. i]; 1081 s = s[i .. $]; 1082 1083 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1084 s = s[i .. $]; 1085 1086 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1087 { 1088 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1089 string key = s[0 .. i]; 1090 s = s[i .. $]; 1091 1092 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1093 s = s[i .. $]; 1094 reqc(s,'='); 1095 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1096 s = s[i .. $]; 1097 1098 immutable char quote = requireOneOf(s,"'\""); 1099 i = s.byCodeUnit.countUntil(quote); 1100 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1101 s = s[i .. $]; 1102 reqc(s,quote); 1103 1104 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1105 s = s[i .. $]; 1106 attr[key] = val; 1107 } 1108 if (optc(s,'/')) 1109 { 1110 if (type == TagType.END) throw new TagException(""); 1111 type = TagType.EMPTY; 1112 } 1113 reqc(s,'>'); 1114 tagString.length = tagString.length - s.length; 1115 } 1116 catch (XMLException e) 1117 { 1118 tagString.length = tagString.length - s.length; 1119 throw new TagException(tagString); 1120 } 1121 } 1122 1123 const 1124 { 1125 /** 1126 * Compares two Tags for equality 1127 * 1128 * You should rarely need to call this function. It exists so that Tags 1129 * can be used as associative array keys. 1130 * 1131 * Example: 1132 * -------------- 1133 * Tag tag1,tag2 1134 * if (tag1 == tag2) { } 1135 * -------------- 1136 */ 1137 override bool opEquals(scope Object o) 1138 { 1139 const tag = toType!(const Tag)(o); 1140 return 1141 (name != tag.name) ? false : ( 1142 (attr != tag.attr) ? false : ( 1143 (type != tag.type) ? false : ( 1144 true ))); 1145 } 1146 1147 /** 1148 * Compares two Tags 1149 * 1150 * Example: 1151 * -------------- 1152 * Tag tag1,tag2 1153 * if (tag1 < tag2) { } 1154 * -------------- 1155 */ 1156 override int opCmp(Object o) 1157 { 1158 const tag = toType!(const Tag)(o); 1159 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1160 return 1161 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1162 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1163 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1164 0 ))); 1165 } 1166 1167 /** 1168 * Returns the hash of a Tag 1169 * 1170 * You should rarely need to call this function. It exists so that Tags 1171 * can be used as associative array keys. 1172 */ 1173 override size_t toHash() 1174 { 1175 return typeid(name).getHash(&name); 1176 } 1177 1178 /** 1179 * Returns the string representation of a Tag 1180 * 1181 * Example: 1182 * -------------- 1183 * auto tag = new Tag("book",TagType.START); 1184 * writefln(tag.toString()); // writes "<book>" 1185 * -------------- 1186 */ 1187 override string toString() @safe 1188 { 1189 if (isEmpty) return toEmptyString(); 1190 return (isEnd) ? toEndString() : toStartString(); 1191 } 1192 1193 private 1194 { 1195 string toNonEndString() @safe 1196 { 1197 import std.format : format; 1198 1199 string s = "<" ~ name; 1200 foreach (key,val;attr) 1201 s ~= format(" %s=\"%s\"",key,encode(val)); 1202 return s; 1203 } 1204 1205 string toStartString() @safe { return toNonEndString() ~ ">"; } 1206 1207 string toEndString() @safe { return "</" ~ name ~ ">"; } 1208 1209 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1210 } 1211 1212 /** 1213 * Returns true if the Tag is a start tag 1214 * 1215 * Example: 1216 * -------------- 1217 * if (tag.isStart) { } 1218 * -------------- 1219 */ 1220 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1221 1222 /** 1223 * Returns true if the Tag is an end tag 1224 * 1225 * Example: 1226 * -------------- 1227 * if (tag.isEnd) { } 1228 * -------------- 1229 */ 1230 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1231 1232 /** 1233 * Returns true if the Tag is an empty tag 1234 * 1235 * Example: 1236 * -------------- 1237 * if (tag.isEmpty) { } 1238 * -------------- 1239 */ 1240 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1241 } 1242 } 1243 1244 /** 1245 * Class representing a comment 1246 */ 1247 class Comment : Item 1248 { 1249 private string content; 1250 1251 /** 1252 * Construct a comment 1253 * 1254 * Params: 1255 * content = the body of the comment 1256 * 1257 * Throws: CommentException if the comment body is illegal (contains "--" 1258 * or exactly equals "-") 1259 * 1260 * Example: 1261 * -------------- 1262 * auto item = new Comment("This is a comment"); 1263 * // constructs <!--This is a comment--> 1264 * -------------- 1265 */ 1266 this(string content) @safe pure 1267 { 1268 import std..string : indexOf; 1269 1270 if (content == "-" || content.indexOf("--") != -1) 1271 throw new CommentException(content); 1272 this.content = content; 1273 } 1274 1275 /** 1276 * Compares two comments for equality 1277 * 1278 * Example: 1279 * -------------- 1280 * Comment item1,item2; 1281 * if (item1 == item2) { } 1282 * -------------- 1283 */ 1284 override bool opEquals(scope const Object o) const 1285 { 1286 const item = toType!(const Item)(o); 1287 const t = cast(const Comment) item; 1288 return t !is null && content == t.content; 1289 } 1290 1291 /** 1292 * Compares two comments 1293 * 1294 * You should rarely need to call this function. It exists so that Comments 1295 * can be used as associative array keys. 1296 * 1297 * Example: 1298 * -------------- 1299 * Comment item1,item2; 1300 * if (item1 < item2) { } 1301 * -------------- 1302 */ 1303 override int opCmp(scope const Object o) scope const 1304 { 1305 const item = toType!(const Item)(o); 1306 const t = cast(const Comment) item; 1307 return t !is null && (content != t.content 1308 ? (content < t.content ? -1 : 1 ) : 0 ); 1309 } 1310 1311 /** 1312 * Returns the hash of a Comment 1313 * 1314 * You should rarely need to call this function. It exists so that Comments 1315 * can be used as associative array keys. 1316 */ 1317 override size_t toHash() scope const nothrow { return hash(content); } 1318 1319 /** 1320 * Returns a string representation of this comment 1321 */ 1322 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1323 1324 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1325 } 1326 1327 @safe unittest // issue 16241 1328 { 1329 import std.exception : assertThrown; 1330 auto c = new Comment("=="); 1331 assert(c.content == "=="); 1332 assertThrown!CommentException(new Comment("--")); 1333 } 1334 1335 /** 1336 * Class representing a Character Data section 1337 */ 1338 class CData : Item 1339 { 1340 private string content; 1341 1342 /** 1343 * Construct a character data section 1344 * 1345 * Params: 1346 * content = the body of the character data segment 1347 * 1348 * Throws: CDataException if the segment body is illegal (contains "]]>") 1349 * 1350 * Example: 1351 * -------------- 1352 * auto item = new CData("<b>hello</b>"); 1353 * // constructs <![CDATA[<b>hello</b>]]> 1354 * -------------- 1355 */ 1356 this(string content) @safe pure 1357 { 1358 import std..string : indexOf; 1359 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1360 this.content = content; 1361 } 1362 1363 /** 1364 * Compares two CDatas for equality 1365 * 1366 * Example: 1367 * -------------- 1368 * CData item1,item2; 1369 * if (item1 == item2) { } 1370 * -------------- 1371 */ 1372 override bool opEquals(scope const Object o) const 1373 { 1374 const item = toType!(const Item)(o); 1375 const t = cast(const CData) item; 1376 return t !is null && content == t.content; 1377 } 1378 1379 /** 1380 * Compares two CDatas 1381 * 1382 * You should rarely need to call this function. It exists so that CDatas 1383 * can be used as associative array keys. 1384 * 1385 * Example: 1386 * -------------- 1387 * CData item1,item2; 1388 * if (item1 < item2) { } 1389 * -------------- 1390 */ 1391 override int opCmp(scope const Object o) scope const 1392 { 1393 const item = toType!(const Item)(o); 1394 const t = cast(const CData) item; 1395 return t !is null && (content != t.content 1396 ? (content < t.content ? -1 : 1 ) : 0 ); 1397 } 1398 1399 /** 1400 * Returns the hash of a CData 1401 * 1402 * You should rarely need to call this function. It exists so that CDatas 1403 * can be used as associative array keys. 1404 */ 1405 override size_t toHash() scope const nothrow { return hash(content); } 1406 1407 /** 1408 * Returns a string representation of this CData section 1409 */ 1410 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1411 1412 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1413 } 1414 1415 /** 1416 * Class representing a text (aka Parsed Character Data) section 1417 */ 1418 class Text : Item 1419 { 1420 private string content; 1421 1422 /** 1423 * Construct a text (aka PCData) section 1424 * 1425 * Params: 1426 * content = the text. This function encodes the text before 1427 * insertion, so it is safe to insert any text 1428 * 1429 * Example: 1430 * -------------- 1431 * auto Text = new CData("a < b"); 1432 * // constructs a < b 1433 * -------------- 1434 */ 1435 this(string content) @safe pure 1436 { 1437 this.content = encode(content); 1438 } 1439 1440 /** 1441 * Compares two text sections for equality 1442 * 1443 * Example: 1444 * -------------- 1445 * Text item1,item2; 1446 * if (item1 == item2) { } 1447 * -------------- 1448 */ 1449 override bool opEquals(scope const Object o) const 1450 { 1451 const item = toType!(const Item)(o); 1452 const t = cast(const Text) item; 1453 return t !is null && content == t.content; 1454 } 1455 1456 /** 1457 * Compares two text sections 1458 * 1459 * You should rarely need to call this function. It exists so that Texts 1460 * can be used as associative array keys. 1461 * 1462 * Example: 1463 * -------------- 1464 * Text item1,item2; 1465 * if (item1 < item2) { } 1466 * -------------- 1467 */ 1468 override int opCmp(scope const Object o) scope const 1469 { 1470 const item = toType!(const Item)(o); 1471 const t = cast(const Text) item; 1472 return t !is null 1473 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1474 } 1475 1476 /** 1477 * Returns the hash of a text section 1478 * 1479 * You should rarely need to call this function. It exists so that Texts 1480 * can be used as associative array keys. 1481 */ 1482 override size_t toHash() scope const nothrow { return hash(content); } 1483 1484 /** 1485 * Returns a string representation of this Text section 1486 */ 1487 override string toString() scope const @safe @nogc pure nothrow { return content; } 1488 1489 /** 1490 * Returns true if the content is the empty string 1491 */ 1492 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1493 } 1494 1495 /** 1496 * Class representing an XML Instruction section 1497 */ 1498 class XMLInstruction : Item 1499 { 1500 private string content; 1501 1502 /** 1503 * Construct an XML Instruction section 1504 * 1505 * Params: 1506 * content = the body of the instruction segment 1507 * 1508 * Throws: XIException if the segment body is illegal (contains ">") 1509 * 1510 * Example: 1511 * -------------- 1512 * auto item = new XMLInstruction("ATTLIST"); 1513 * // constructs <!ATTLIST> 1514 * -------------- 1515 */ 1516 this(string content) @safe pure 1517 { 1518 import std..string : indexOf; 1519 if (content.indexOf(">") != -1) throw new XIException(content); 1520 this.content = content; 1521 } 1522 1523 /** 1524 * Compares two XML instructions for equality 1525 * 1526 * Example: 1527 * -------------- 1528 * XMLInstruction item1,item2; 1529 * if (item1 == item2) { } 1530 * -------------- 1531 */ 1532 override bool opEquals(scope const Object o) const 1533 { 1534 const item = toType!(const Item)(o); 1535 const t = cast(const XMLInstruction) item; 1536 return t !is null && content == t.content; 1537 } 1538 1539 /** 1540 * Compares two XML instructions 1541 * 1542 * You should rarely need to call this function. It exists so that 1543 * XmlInstructions can be used as associative array keys. 1544 * 1545 * Example: 1546 * -------------- 1547 * XMLInstruction item1,item2; 1548 * if (item1 < item2) { } 1549 * -------------- 1550 */ 1551 override int opCmp(scope const Object o) scope const 1552 { 1553 const item = toType!(const Item)(o); 1554 const t = cast(const XMLInstruction) item; 1555 return t !is null 1556 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1557 } 1558 1559 /** 1560 * Returns the hash of an XMLInstruction 1561 * 1562 * You should rarely need to call this function. It exists so that 1563 * XmlInstructions can be used as associative array keys. 1564 */ 1565 override size_t toHash() scope const nothrow { return hash(content); } 1566 1567 /** 1568 * Returns a string representation of this XmlInstruction 1569 */ 1570 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1571 1572 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1573 } 1574 1575 /** 1576 * Class representing a Processing Instruction section 1577 */ 1578 class ProcessingInstruction : Item 1579 { 1580 private string content; 1581 1582 /** 1583 * Construct a Processing Instruction section 1584 * 1585 * Params: 1586 * content = the body of the instruction segment 1587 * 1588 * Throws: PIException if the segment body is illegal (contains "?>") 1589 * 1590 * Example: 1591 * -------------- 1592 * auto item = new ProcessingInstruction("php"); 1593 * // constructs <?php?> 1594 * -------------- 1595 */ 1596 this(string content) @safe pure 1597 { 1598 import std..string : indexOf; 1599 if (content.indexOf("?>") != -1) throw new PIException(content); 1600 this.content = content; 1601 } 1602 1603 /** 1604 * Compares two processing instructions for equality 1605 * 1606 * Example: 1607 * -------------- 1608 * ProcessingInstruction item1,item2; 1609 * if (item1 == item2) { } 1610 * -------------- 1611 */ 1612 override bool opEquals(scope const Object o) const 1613 { 1614 const item = toType!(const Item)(o); 1615 const t = cast(const ProcessingInstruction) item; 1616 return t !is null && content == t.content; 1617 } 1618 1619 /** 1620 * Compares two processing instructions 1621 * 1622 * You should rarely need to call this function. It exists so that 1623 * ProcessingInstructions can be used as associative array keys. 1624 * 1625 * Example: 1626 * -------------- 1627 * ProcessingInstruction item1,item2; 1628 * if (item1 < item2) { } 1629 * -------------- 1630 */ 1631 override int opCmp(scope const Object o) scope const 1632 { 1633 const item = toType!(const Item)(o); 1634 const t = cast(const ProcessingInstruction) item; 1635 return t !is null 1636 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1637 } 1638 1639 /** 1640 * Returns the hash of a ProcessingInstruction 1641 * 1642 * You should rarely need to call this function. It exists so that 1643 * ProcessingInstructions can be used as associative array keys. 1644 */ 1645 override size_t toHash() scope const nothrow { return hash(content); } 1646 1647 /** 1648 * Returns a string representation of this ProcessingInstruction 1649 */ 1650 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1651 1652 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always 1653 } 1654 1655 /** 1656 * Abstract base class for XML items 1657 */ 1658 abstract class Item 1659 { 1660 /// Compares with another Item of same type for equality 1661 abstract override bool opEquals(scope const Object o) @safe const; 1662 1663 /// Compares with another Item of same type 1664 abstract override int opCmp(scope const Object o) @safe const; 1665 1666 /// Returns the hash of this item 1667 abstract override size_t toHash() @safe scope const; 1668 1669 /// Returns a string representation of this item 1670 abstract override string toString() @safe scope const; 1671 1672 /** 1673 * Returns an indented string representation of this item 1674 * 1675 * Params: 1676 * indent = number of spaces by which to indent child elements 1677 */ 1678 string[] pretty(uint indent) @safe scope const 1679 { 1680 import std..string : strip; 1681 string s = strip(toString()); 1682 return s.length == 0 ? [] : [ s ]; 1683 } 1684 1685 /// Returns true if the item represents empty XML text 1686 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1687 } 1688 1689 /** 1690 * Class for parsing an XML Document. 1691 * 1692 * This is a subclass of ElementParser. Most of the useful functions are 1693 * documented there. 1694 * 1695 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1696 * 1697 * Bugs: 1698 * Currently only supports UTF documents. 1699 * 1700 * If there is an encoding attribute in the prolog, it is ignored. 1701 * 1702 */ 1703 class DocumentParser : ElementParser 1704 { 1705 string xmlText; 1706 1707 /** 1708 * Constructs a DocumentParser. 1709 * 1710 * The input to this function MUST be valid XML. 1711 * This is enforced by the function's in contract. 1712 * 1713 * Params: 1714 * xmlText_ = the entire XML document as text 1715 * 1716 */ 1717 this(string xmlText_) 1718 in 1719 { 1720 assert(xmlText_.length != 0); 1721 try 1722 { 1723 // Confirm that the input is valid XML 1724 check(xmlText_); 1725 } 1726 catch (CheckException e) 1727 { 1728 // And if it's not, tell the user why not 1729 assert(false, "\n" ~ e.toString()); 1730 } 1731 } 1732 body 1733 { 1734 xmlText = xmlText_; 1735 s = &xmlText; 1736 super(); // Initialize everything 1737 parse(); // Parse through the root tag (but not beyond) 1738 } 1739 } 1740 1741 @system unittest 1742 { 1743 auto doc = new Document("<root><child><grandchild/></child></root>"); 1744 assert(doc.elements.length == 1); 1745 assert(doc.elements[0].tag.name == "child"); 1746 assert(doc.items == doc.elements); 1747 } 1748 1749 /** 1750 * Class for parsing an XML element. 1751 * 1752 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1753 * 1754 * Note that you cannot construct instances of this class directly. You can 1755 * construct a DocumentParser (which is a subclass of ElementParser), but 1756 * otherwise, Instances of ElementParser will be created for you by the 1757 * library, and passed your way via onStartTag handlers. 1758 * 1759 */ 1760 class ElementParser 1761 { 1762 alias Handler = void delegate(string); 1763 alias ElementHandler = void delegate(in Element element); 1764 alias ParserHandler = void delegate(ElementParser parser); 1765 1766 private 1767 { 1768 Tag tag_; 1769 string elementStart; 1770 string* s; 1771 1772 Handler commentHandler = null; 1773 Handler cdataHandler = null; 1774 Handler xiHandler = null; 1775 Handler piHandler = null; 1776 Handler rawTextHandler = null; 1777 Handler textHandler = null; 1778 1779 // Private constructor for start tags 1780 this(ElementParser parent) @safe @nogc pure nothrow 1781 { 1782 s = parent.s; 1783 this(); 1784 tag_ = parent.tag_; 1785 } 1786 1787 // Private constructor for empty tags 1788 this(Tag tag, string* t) @safe @nogc pure nothrow 1789 { 1790 s = t; 1791 this(); 1792 tag_ = tag; 1793 } 1794 } 1795 1796 /** 1797 * The Tag at the start of the element being parsed. You can read this to 1798 * determine the tag's name and attributes. 1799 */ 1800 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1801 1802 /** 1803 * Register a handler which will be called whenever a start tag is 1804 * encountered which matches the specified name. You can also pass null as 1805 * the name, in which case the handler will be called for any unmatched 1806 * start tag. 1807 * 1808 * Example: 1809 * -------------- 1810 * // Call this function whenever a <podcast> start tag is encountered 1811 * onStartTag["podcast"] = (ElementParser xml) 1812 * { 1813 * // Your code here 1814 * // 1815 * // This is a a closure, so code here may reference 1816 * // variables which are outside of this scope 1817 * }; 1818 * 1819 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1820 * // start tag is encountered 1821 * onStartTag["episode"] = &myEpisodeStartHandler; 1822 * 1823 * // call delegate dg for all other start tags 1824 * onStartTag[null] = dg; 1825 * -------------- 1826 * 1827 * This library will supply your function with a new instance of 1828 * ElementHandler, which may be used to parse inside the element whose 1829 * start tag was just found, or to identify the tag attributes of the 1830 * element, etc. 1831 * 1832 * Note that your function will be called for both start tags and empty 1833 * tags. That is, we make no distinction between <br></br> 1834 * and <br/>. 1835 */ 1836 ParserHandler[string] onStartTag; 1837 1838 /** 1839 * Register a handler which will be called whenever an end tag is 1840 * encountered which matches the specified name. You can also pass null as 1841 * the name, in which case the handler will be called for any unmatched 1842 * end tag. 1843 * 1844 * Example: 1845 * -------------- 1846 * // Call this function whenever a </podcast> end tag is encountered 1847 * onEndTag["podcast"] = (in Element e) 1848 * { 1849 * // Your code here 1850 * // 1851 * // This is a a closure, so code here may reference 1852 * // variables which are outside of this scope 1853 * }; 1854 * 1855 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1856 * // end tag is encountered 1857 * onEndTag["episode"] = &myEpisodeEndHandler; 1858 * 1859 * // call delegate dg for all other end tags 1860 * onEndTag[null] = dg; 1861 * -------------- 1862 * 1863 * Note that your function will be called for both start tags and empty 1864 * tags. That is, we make no distinction between <br></br> 1865 * and <br/>. 1866 */ 1867 ElementHandler[string] onEndTag; 1868 1869 protected this() @safe @nogc pure nothrow 1870 { 1871 elementStart = *s; 1872 } 1873 1874 /** 1875 * Register a handler which will be called whenever text is encountered. 1876 * 1877 * Example: 1878 * -------------- 1879 * // Call this function whenever text is encountered 1880 * onText = (string s) 1881 * { 1882 * // Your code here 1883 * 1884 * // The passed parameter s will have been decoded by the time you see 1885 * // it, and so may contain any character. 1886 * // 1887 * // This is a a closure, so code here may reference 1888 * // variables which are outside of this scope 1889 * }; 1890 * -------------- 1891 */ 1892 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1893 1894 /** 1895 * Register an alternative handler which will be called whenever text 1896 * is encountered. This differs from onText in that onText will decode 1897 * the text, whereas onTextRaw will not. This allows you to make design 1898 * choices, since onText will be more accurate, but slower, while 1899 * onTextRaw will be faster, but less accurate. Of course, you can 1900 * still call decode() within your handler, if you want, but you'd 1901 * probably want to use onTextRaw only in circumstances where you 1902 * know that decoding is unnecessary. 1903 * 1904 * Example: 1905 * -------------- 1906 * // Call this function whenever text is encountered 1907 * onText = (string s) 1908 * { 1909 * // Your code here 1910 * 1911 * // The passed parameter s will NOT have been decoded. 1912 * // 1913 * // This is a a closure, so code here may reference 1914 * // variables which are outside of this scope 1915 * }; 1916 * -------------- 1917 */ 1918 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1919 1920 /** 1921 * Register a handler which will be called whenever a character data 1922 * segment is encountered. 1923 * 1924 * Example: 1925 * -------------- 1926 * // Call this function whenever a CData section is encountered 1927 * onCData = (string s) 1928 * { 1929 * // Your code here 1930 * 1931 * // The passed parameter s does not include the opening <![CDATA[ 1932 * // nor closing ]]> 1933 * // 1934 * // This is a a closure, so code here may reference 1935 * // variables which are outside of this scope 1936 * }; 1937 * -------------- 1938 */ 1939 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1940 1941 /** 1942 * Register a handler which will be called whenever a comment is 1943 * encountered. 1944 * 1945 * Example: 1946 * -------------- 1947 * // Call this function whenever a comment is encountered 1948 * onComment = (string s) 1949 * { 1950 * // Your code here 1951 * 1952 * // The passed parameter s does not include the opening <!-- nor 1953 * // closing --> 1954 * // 1955 * // This is a a closure, so code here may reference 1956 * // variables which are outside of this scope 1957 * }; 1958 * -------------- 1959 */ 1960 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1961 1962 /** 1963 * Register a handler which will be called whenever a processing 1964 * instruction is encountered. 1965 * 1966 * Example: 1967 * -------------- 1968 * // Call this function whenever a processing instruction is encountered 1969 * onPI = (string s) 1970 * { 1971 * // Your code here 1972 * 1973 * // The passed parameter s does not include the opening <? nor 1974 * // closing ?> 1975 * // 1976 * // This is a a closure, so code here may reference 1977 * // variables which are outside of this scope 1978 * }; 1979 * -------------- 1980 */ 1981 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1982 1983 /** 1984 * Register a handler which will be called whenever an XML instruction is 1985 * encountered. 1986 * 1987 * Example: 1988 * -------------- 1989 * // Call this function whenever an XML instruction is encountered 1990 * // (Note: XML instructions may only occur preceding the root tag of a 1991 * // document). 1992 * onPI = (string s) 1993 * { 1994 * // Your code here 1995 * 1996 * // The passed parameter s does not include the opening <! nor 1997 * // closing > 1998 * // 1999 * // This is a a closure, so code here may reference 2000 * // variables which are outside of this scope 2001 * }; 2002 * -------------- 2003 */ 2004 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 2005 2006 /** 2007 * Parse an XML element. 2008 * 2009 * Parsing will continue until the end of the current element. Any items 2010 * encountered for which a handler has been registered will invoke that 2011 * handler. 2012 * 2013 * Throws: various kinds of XMLException 2014 */ 2015 void parse() 2016 { 2017 import std.algorithm.searching : startsWith; 2018 import std..string : indexOf; 2019 2020 string t; 2021 const Tag root = tag_; 2022 Tag[string] startTags; 2023 if (tag_ !is null) startTags[tag_.name] = tag_; 2024 2025 while (s.length != 0) 2026 { 2027 if (startsWith(*s,"<!--")) 2028 { 2029 chop(*s,4); 2030 t = chop(*s,indexOf(*s,"-->")); 2031 if (commentHandler.funcptr !is null) commentHandler(t); 2032 chop(*s,3); 2033 } 2034 else if (startsWith(*s,"<![CDATA[")) 2035 { 2036 chop(*s,9); 2037 t = chop(*s,indexOf(*s,"]]>")); 2038 if (cdataHandler.funcptr !is null) cdataHandler(t); 2039 chop(*s,3); 2040 } 2041 else if (startsWith(*s,"<!")) 2042 { 2043 chop(*s,2); 2044 t = chop(*s,indexOf(*s,">")); 2045 if (xiHandler.funcptr !is null) xiHandler(t); 2046 chop(*s,1); 2047 } 2048 else if (startsWith(*s,"<?")) 2049 { 2050 chop(*s,2); 2051 t = chop(*s,indexOf(*s,"?>")); 2052 if (piHandler.funcptr !is null) piHandler(t); 2053 chop(*s,2); 2054 } 2055 else if (startsWith(*s,"<")) 2056 { 2057 tag_ = new Tag(*s,true); 2058 if (root is null) 2059 return; // Return to constructor of derived class 2060 2061 if (tag_.isStart) 2062 { 2063 startTags[tag_.name] = tag_; 2064 2065 auto parser = new ElementParser(this); 2066 2067 auto handler = tag_.name in onStartTag; 2068 if (handler !is null) (*handler)(parser); 2069 else 2070 { 2071 handler = null in onStartTag; 2072 if (handler !is null) (*handler)(parser); 2073 } 2074 } 2075 else if (tag_.isEnd) 2076 { 2077 const startTag = startTags[tag_.name]; 2078 string text; 2079 2080 if (startTag.tagString.length == 0) 2081 assert(0); 2082 2083 immutable(char)* p = startTag.tagString.ptr 2084 + startTag.tagString.length; 2085 immutable(char)* q = &tag_.tagString[0]; 2086 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2087 2088 auto element = new Element(startTag); 2089 if (text.length != 0) element ~= new Text(text); 2090 2091 auto handler = tag_.name in onEndTag; 2092 if (handler !is null) (*handler)(element); 2093 else 2094 { 2095 handler = null in onEndTag; 2096 if (handler !is null) (*handler)(element); 2097 } 2098 2099 if (tag_.name == root.name) return; 2100 } 2101 else if (tag_.isEmpty) 2102 { 2103 Tag startTag = new Tag(tag_.name); 2104 2105 // FIX by hed010gy, for bug 2979 2106 // http://d.puremagic.com/issues/show_bug.cgi?id=2979 2107 if (tag_.attr.length > 0) 2108 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2109 // END FIX 2110 2111 // Handle the pretend start tag 2112 string s2; 2113 auto parser = new ElementParser(startTag,&s2); 2114 auto handler1 = startTag.name in onStartTag; 2115 if (handler1 !is null) (*handler1)(parser); 2116 else 2117 { 2118 handler1 = null in onStartTag; 2119 if (handler1 !is null) (*handler1)(parser); 2120 } 2121 2122 // Handle the pretend end tag 2123 auto element = new Element(startTag); 2124 auto handler2 = tag_.name in onEndTag; 2125 if (handler2 !is null) (*handler2)(element); 2126 else 2127 { 2128 handler2 = null in onEndTag; 2129 if (handler2 !is null) (*handler2)(element); 2130 } 2131 } 2132 } 2133 else 2134 { 2135 t = chop(*s,indexOf(*s,"<")); 2136 if (rawTextHandler.funcptr !is null) 2137 rawTextHandler(t); 2138 else if (textHandler.funcptr !is null) 2139 textHandler(decode(t,DecodeMode.LOOSE)); 2140 } 2141 } 2142 } 2143 2144 /** 2145 * Returns that part of the element which has already been parsed 2146 */ 2147 override string toString() const @nogc @safe pure nothrow 2148 { 2149 assert(elementStart.length >= s.length); 2150 return elementStart[0 .. elementStart.length - s.length]; 2151 } 2152 2153 } 2154 2155 private 2156 { 2157 template Check(string msg) 2158 { 2159 string old = s; 2160 2161 void fail() @safe pure 2162 { 2163 s = old; 2164 throw new Err(s,msg); 2165 } 2166 2167 void fail(Err e) @safe pure 2168 { 2169 s = old; 2170 throw new Err(s,msg,e); 2171 } 2172 2173 void fail(string msg2) @safe pure 2174 { 2175 fail(new Err(s,msg2)); 2176 } 2177 } 2178 2179 void checkMisc(ref string s) @safe pure // rule 27 2180 { 2181 import std.algorithm.searching : startsWith; 2182 2183 mixin Check!("Misc"); 2184 2185 try 2186 { 2187 if (s.startsWith("<!--")) { checkComment(s); } 2188 else if (s.startsWith("<?")) { checkPI(s); } 2189 else { checkSpace(s); } 2190 } 2191 catch (Err e) { fail(e); } 2192 } 2193 2194 void checkDocument(ref string s) @safe pure // rule 1 2195 { 2196 mixin Check!("Document"); 2197 try 2198 { 2199 checkProlog(s); 2200 checkElement(s); 2201 star!(checkMisc)(s); 2202 } 2203 catch (Err e) { fail(e); } 2204 } 2205 2206 void checkChars(ref string s) @safe pure // rule 2 2207 { 2208 // TO DO - Fix std.utf stride and decode functions, then use those 2209 // instead 2210 import std.format : format; 2211 2212 mixin Check!("Chars"); 2213 2214 dchar c; 2215 int n = -1; 2216 foreach (i, dchar d; s) 2217 { 2218 if (!isChar(d)) 2219 { 2220 c = d; 2221 n = cast(int) i; 2222 break; 2223 } 2224 } 2225 if (n != -1) 2226 { 2227 s = s[n..$]; 2228 fail(format("invalid character: U+%04X",c)); 2229 } 2230 } 2231 2232 void checkSpace(ref string s) @safe pure // rule 3 2233 { 2234 import std.algorithm.searching : countUntil; 2235 import std.ascii : isWhite; 2236 import std.utf : byCodeUnit; 2237 2238 mixin Check!("Whitespace"); 2239 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2240 if (i == -1 && s.length > 0 && isWhite(s[0])) 2241 s = s[$ .. $]; 2242 else if (i > -1) 2243 s = s[i .. $]; 2244 if (s is old) fail(); 2245 } 2246 2247 void checkName(ref string s, out string name) @safe pure // rule 5 2248 { 2249 mixin Check!("Name"); 2250 2251 if (s.length == 0) fail(); 2252 int n; 2253 foreach (i, dchar c;s) 2254 { 2255 if (c == '_' || c == ':' || isLetter(c)) continue; 2256 if (i == 0) fail(); 2257 if (c == '-' || c == '.' || isDigit(c) 2258 || isCombiningChar(c) || isExtender(c)) continue; 2259 n = cast(int) i; 2260 break; 2261 } 2262 name = s[0 .. n]; 2263 s = s[n..$]; 2264 } 2265 2266 void checkAttValue(ref string s) @safe pure // rule 10 2267 { 2268 import std.algorithm.searching : countUntil; 2269 import std.utf : byCodeUnit; 2270 2271 mixin Check!("AttValue"); 2272 2273 if (s.length == 0) fail(); 2274 char c = s[0]; 2275 if (c != '\u0022' && c != '\u0027') 2276 fail("attribute value requires quotes"); 2277 s = s[1..$]; 2278 for (;;) 2279 { 2280 s = s[s.byCodeUnit.countUntil(c) .. $]; 2281 if (s.length == 0) fail("unterminated attribute value"); 2282 if (s[0] == '<') fail("< found in attribute value"); 2283 if (s[0] == c) break; 2284 try { checkReference(s); } catch (Err e) { fail(e); } 2285 } 2286 s = s[1..$]; 2287 } 2288 2289 void checkCharData(ref string s) @safe pure // rule 14 2290 { 2291 import std.algorithm.searching : startsWith; 2292 2293 mixin Check!("CharData"); 2294 2295 while (s.length != 0) 2296 { 2297 if (s.startsWith("&")) break; 2298 if (s.startsWith("<")) break; 2299 if (s.startsWith("]]>")) fail("]]> found within char data"); 2300 s = s[1..$]; 2301 } 2302 } 2303 2304 void checkComment(ref string s) @safe pure // rule 15 2305 { 2306 import std..string : indexOf; 2307 2308 mixin Check!("Comment"); 2309 2310 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2311 ptrdiff_t n = s.indexOf("--"); 2312 if (n == -1) fail("unterminated comment"); 2313 s = s[n..$]; 2314 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2315 } 2316 2317 void checkPI(ref string s) @safe pure // rule 16 2318 { 2319 mixin Check!("PI"); 2320 2321 try 2322 { 2323 checkLiteral("<?",s); 2324 checkEnd("?>",s); 2325 } 2326 catch (Err e) { fail(e); } 2327 } 2328 2329 void checkCDSect(ref string s) @safe pure // rule 18 2330 { 2331 mixin Check!("CDSect"); 2332 2333 try 2334 { 2335 checkLiteral(cdata,s); 2336 checkEnd("]]>",s); 2337 } 2338 catch (Err e) { fail(e); } 2339 } 2340 2341 void checkProlog(ref string s) @safe pure // rule 22 2342 { 2343 mixin Check!("Prolog"); 2344 2345 try 2346 { 2347 /* The XML declaration is optional 2348 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2349 */ 2350 opt!(checkXMLDecl)(s); 2351 2352 star!(checkMisc)(s); 2353 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2354 } 2355 catch (Err e) { fail(e); } 2356 } 2357 2358 void checkXMLDecl(ref string s) @safe pure // rule 23 2359 { 2360 mixin Check!("XMLDecl"); 2361 2362 try 2363 { 2364 checkLiteral("<?xml",s); 2365 checkVersionInfo(s); 2366 opt!(checkEncodingDecl)(s); 2367 opt!(checkSDDecl)(s); 2368 opt!(checkSpace)(s); 2369 checkLiteral("?>",s); 2370 } 2371 catch (Err e) { fail(e); } 2372 } 2373 2374 void checkVersionInfo(ref string s) @safe pure // rule 24 2375 { 2376 mixin Check!("VersionInfo"); 2377 2378 try 2379 { 2380 checkSpace(s); 2381 checkLiteral("version",s); 2382 checkEq(s); 2383 quoted!(checkVersionNum)(s); 2384 } 2385 catch (Err e) { fail(e); } 2386 } 2387 2388 void checkEq(ref string s) @safe pure // rule 25 2389 { 2390 mixin Check!("Eq"); 2391 2392 try 2393 { 2394 opt!(checkSpace)(s); 2395 checkLiteral("=",s); 2396 opt!(checkSpace)(s); 2397 } 2398 catch (Err e) { fail(e); } 2399 } 2400 2401 void checkVersionNum(ref string s) @safe pure // rule 26 2402 { 2403 import std.algorithm.searching : countUntil; 2404 import std.utf : byCodeUnit; 2405 2406 mixin Check!("VersionNum"); 2407 2408 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2409 if (s is old) fail(); 2410 } 2411 2412 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2413 { 2414 mixin Check!("DocTypeDecl"); 2415 2416 try 2417 { 2418 checkLiteral("<!DOCTYPE",s); 2419 // 2420 // TO DO -- ensure DOCTYPE is well formed 2421 // (But not yet. That's one of our "future directions") 2422 // 2423 checkEnd(">",s); 2424 } 2425 catch (Err e) { fail(e); } 2426 } 2427 2428 void checkSDDecl(ref string s) @safe pure // rule 32 2429 { 2430 import std.algorithm.searching : startsWith; 2431 2432 mixin Check!("SDDecl"); 2433 2434 try 2435 { 2436 checkSpace(s); 2437 checkLiteral("standalone",s); 2438 checkEq(s); 2439 } 2440 catch (Err e) { fail(e); } 2441 2442 int n = 0; 2443 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2444 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2445 else fail("standalone attribute value must be 'yes', \"yes\","~ 2446 " 'no' or \"no\""); 2447 s = s[n..$]; 2448 } 2449 2450 void checkElement(ref string s) @safe pure // rule 39 2451 { 2452 mixin Check!("Element"); 2453 2454 string sname,ename,t; 2455 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2456 2457 if (t == "STag") 2458 { 2459 try 2460 { 2461 checkContent(s); 2462 t = s; 2463 checkETag(s,ename); 2464 } 2465 catch (Err e) { fail(e); } 2466 2467 if (sname != ename) 2468 { 2469 s = t; 2470 fail("end tag name \"" ~ ename 2471 ~ "\" differs from start tag name \""~sname~"\""); 2472 } 2473 } 2474 } 2475 2476 // rules 40 and 44 2477 void checkTag(ref string s, out string type, out string name) @safe pure 2478 { 2479 mixin Check!("Tag"); 2480 2481 try 2482 { 2483 type = "STag"; 2484 checkLiteral("<",s); 2485 checkName(s,name); 2486 star!(seq!(checkSpace,checkAttribute))(s); 2487 opt!(checkSpace)(s); 2488 if (s.length != 0 && s[0] == '/') 2489 { 2490 s = s[1..$]; 2491 type = "ETag"; 2492 } 2493 checkLiteral(">",s); 2494 } 2495 catch (Err e) { fail(e); } 2496 } 2497 2498 void checkAttribute(ref string s) @safe pure // rule 41 2499 { 2500 mixin Check!("Attribute"); 2501 2502 try 2503 { 2504 string name; 2505 checkName(s,name); 2506 checkEq(s); 2507 checkAttValue(s); 2508 } 2509 catch (Err e) { fail(e); } 2510 } 2511 2512 void checkETag(ref string s, out string name) @safe pure // rule 42 2513 { 2514 mixin Check!("ETag"); 2515 2516 try 2517 { 2518 checkLiteral("</",s); 2519 checkName(s,name); 2520 opt!(checkSpace)(s); 2521 checkLiteral(">",s); 2522 } 2523 catch (Err e) { fail(e); } 2524 } 2525 2526 void checkContent(ref string s) @safe pure // rule 43 2527 { 2528 import std.algorithm.searching : startsWith; 2529 2530 mixin Check!("Content"); 2531 2532 try 2533 { 2534 while (s.length != 0) 2535 { 2536 old = s; 2537 if (s.startsWith("&")) { checkReference(s); } 2538 else if (s.startsWith("<!--")) { checkComment(s); } 2539 else if (s.startsWith("<?")) { checkPI(s); } 2540 else if (s.startsWith(cdata)) { checkCDSect(s); } 2541 else if (s.startsWith("</")) { break; } 2542 else if (s.startsWith("<")) { checkElement(s); } 2543 else { checkCharData(s); } 2544 } 2545 } 2546 catch (Err e) { fail(e); } 2547 } 2548 2549 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2550 { 2551 import std.format : format; 2552 2553 mixin Check!("CharRef"); 2554 2555 c = 0; 2556 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2557 int radix = 10; 2558 if (s.length != 0 && s[0] == 'x') 2559 { 2560 s = s[1..$]; 2561 radix = 16; 2562 } 2563 if (s.length == 0) fail("unterminated character reference"); 2564 if (s[0] == ';') 2565 fail("character reference must have at least one digit"); 2566 while (s.length != 0) 2567 { 2568 immutable char d = s[0]; 2569 int n = 0; 2570 switch (d) 2571 { 2572 case 'F','f': ++n; goto case; 2573 case 'E','e': ++n; goto case; 2574 case 'D','d': ++n; goto case; 2575 case 'C','c': ++n; goto case; 2576 case 'B','b': ++n; goto case; 2577 case 'A','a': ++n; goto case; 2578 case '9': ++n; goto case; 2579 case '8': ++n; goto case; 2580 case '7': ++n; goto case; 2581 case '6': ++n; goto case; 2582 case '5': ++n; goto case; 2583 case '4': ++n; goto case; 2584 case '3': ++n; goto case; 2585 case '2': ++n; goto case; 2586 case '1': ++n; goto case; 2587 case '0': break; 2588 default: n = 100; break; 2589 } 2590 if (n >= radix) break; 2591 c *= radix; 2592 c += n; 2593 s = s[1..$]; 2594 } 2595 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2596 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2597 else s = s[1..$]; 2598 } 2599 2600 void checkReference(ref string s) @safe pure // rule 67 2601 { 2602 import std.algorithm.searching : startsWith; 2603 2604 mixin Check!("Reference"); 2605 2606 try 2607 { 2608 dchar c; 2609 if (s.startsWith("&#")) checkCharRef(s,c); 2610 else checkEntityRef(s); 2611 } 2612 catch (Err e) { fail(e); } 2613 } 2614 2615 void checkEntityRef(ref string s) @safe pure // rule 68 2616 { 2617 mixin Check!("EntityRef"); 2618 2619 try 2620 { 2621 string name; 2622 checkLiteral("&",s); 2623 checkName(s,name); 2624 checkLiteral(";",s); 2625 } 2626 catch (Err e) { fail(e); } 2627 } 2628 2629 void checkEncName(ref string s) @safe pure // rule 81 2630 { 2631 import std.algorithm.searching : countUntil; 2632 import std.ascii : isAlpha; 2633 import std.utf : byCodeUnit; 2634 2635 mixin Check!("EncName"); 2636 2637 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2638 if (s is old) fail(); 2639 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2640 } 2641 2642 void checkEncodingDecl(ref string s) @safe pure // rule 80 2643 { 2644 mixin Check!("EncodingDecl"); 2645 2646 try 2647 { 2648 checkSpace(s); 2649 checkLiteral("encoding",s); 2650 checkEq(s); 2651 quoted!(checkEncName)(s); 2652 } 2653 catch (Err e) { fail(e); } 2654 } 2655 2656 // Helper functions 2657 2658 void checkLiteral(string literal,ref string s) @safe pure 2659 { 2660 import std..string : startsWith; 2661 2662 mixin Check!("Literal"); 2663 2664 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2665 s = s[literal.length..$]; 2666 } 2667 2668 void checkEnd(string end,ref string s) @safe pure 2669 { 2670 import std..string : indexOf; 2671 // Deliberately no mixin Check here. 2672 2673 auto n = s.indexOf(end); 2674 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2675 s = s[n..$]; 2676 checkLiteral(end,s); 2677 } 2678 2679 // Metafunctions -- none of these use mixin Check 2680 2681 void opt(alias f)(ref string s) 2682 { 2683 try { f(s); } catch (Err e) {} 2684 } 2685 2686 void plus(alias f)(ref string s) 2687 { 2688 f(s); 2689 star!(f)(s); 2690 } 2691 2692 void star(alias f)(ref string s) 2693 { 2694 while (s.length != 0) 2695 { 2696 try { f(s); } 2697 catch (Err e) { return; } 2698 } 2699 } 2700 2701 void quoted(alias f)(ref string s) 2702 { 2703 import std..string : startsWith; 2704 2705 if (s.startsWith("'")) 2706 { 2707 checkLiteral("'",s); 2708 f(s); 2709 checkLiteral("'",s); 2710 } 2711 else 2712 { 2713 checkLiteral("\"",s); 2714 f(s); 2715 checkLiteral("\"",s); 2716 } 2717 } 2718 2719 void seq(alias f,alias g)(ref string s) 2720 { 2721 f(s); 2722 g(s); 2723 } 2724 } 2725 2726 /** 2727 * Check an entire XML document for well-formedness 2728 * 2729 * Params: 2730 * s = the document to be checked, passed as a string 2731 * 2732 * Throws: CheckException if the document is not well formed 2733 * 2734 * CheckException's toString() method will yield the complete hierarchy of 2735 * parse failure (the XML equivalent of a stack trace), giving the line and 2736 * column number of every failure at every level. 2737 */ 2738 void check(string s) @safe pure 2739 { 2740 try 2741 { 2742 checkChars(s); 2743 checkDocument(s); 2744 if (s.length != 0) throw new Err(s,"Junk found after document"); 2745 } 2746 catch (Err e) 2747 { 2748 e.complete(s); 2749 throw e; 2750 } 2751 } 2752 2753 @system pure unittest 2754 { 2755 import std..string : indexOf; 2756 2757 try 2758 { 2759 check(q"[<?xml version="1.0"?> 2760 <catalog> 2761 <book id="bk101"> 2762 <author>Gambardella, Matthew</author> 2763 <title>XML Developer's Guide</title> 2764 <genre>Computer</genre> 2765 <price>44.95</price> 2766 <publish_date>2000-10-01</publish_date> 2767 <description>An in-depth look at creating applications 2768 with XML.</description> 2769 </book> 2770 <book id="bk102"> 2771 <author>Ralls, Kim</author> 2772 <title>Midnight Rain</title> 2773 <genre>Fantasy</genres> 2774 <price>5.95</price> 2775 <publish_date>2000-12-16</publish_date> 2776 <description>A former architect battles corporate zombies, 2777 an evil sorceress, and her own childhood to become queen 2778 of the world.</description> 2779 </book> 2780 <book id="bk103"> 2781 <author>Corets, Eva</author> 2782 <title>Maeve Ascendant</title> 2783 <genre>Fantasy</genre> 2784 <price>5.95</price> 2785 <publish_date>2000-11-17</publish_date> 2786 <description>After the collapse of a nanotechnology 2787 society in England, the young survivors lay the 2788 foundation for a new society.</description> 2789 </book> 2790 </catalog> 2791 ]"); 2792 assert(false); 2793 } 2794 catch (CheckException e) 2795 { 2796 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2797 " from start tag name \"genre\""); 2798 assert(n != -1); 2799 } 2800 } 2801 2802 @system unittest 2803 { 2804 string s = q"EOS 2805 <?xml version="1.0"?> 2806 <set> 2807 <one>A</one> 2808 <!-- comment --> 2809 <two>B</two> 2810 </set> 2811 EOS"; 2812 try 2813 { 2814 check(s); 2815 } 2816 catch (CheckException e) 2817 { 2818 assert(0, e.toString()); 2819 } 2820 } 2821 2822 @system unittest 2823 { 2824 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2825 xmlns:stream="http://etherx.'jabber'.org/streams" 2826 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2827 xml:lang="en" version="1.0" attr='a"b"c'> 2828 </stream:stream></r>`; 2829 2830 DocumentParser parser = new DocumentParser(test_xml); 2831 bool tested = false; 2832 parser.onStartTag["stream:stream"] = (ElementParser p) { 2833 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2834 assert(p.tag.attr["from"] == "jid.pl"); 2835 assert(p.tag.attr["attr"] == "a\"b\"c"); 2836 tested = true; 2837 }; 2838 parser.parse(); 2839 assert(tested); 2840 } 2841 2842 @system unittest 2843 { 2844 string s = q"EOS 2845 <?xml version="1.0" encoding="utf-8"?> <Tests> 2846 <Test thing="What & Up">What & Up Second</Test> 2847 </Tests> 2848 EOS"; 2849 auto xml = new DocumentParser(s); 2850 2851 xml.onStartTag["Test"] = (ElementParser xml) { 2852 assert(xml.tag.attr["thing"] == "What & Up"); 2853 }; 2854 2855 xml.onEndTag["Test"] = (in Element e) { 2856 assert(e.text() == "What & Up Second"); 2857 }; 2858 xml.parse(); 2859 } 2860 2861 @system unittest 2862 { 2863 string s = `<tag attr=""value>" />`; 2864 auto doc = new Document(s); 2865 assert(doc.toString() == s); 2866 } 2867 2868 /** The base class for exceptions thrown by this module */ 2869 class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2870 2871 // Other exceptions 2872 2873 /// Thrown during Comment constructor 2874 class CommentException : XMLException 2875 { private this(string msg) @safe pure { super(msg); } } 2876 2877 /// Thrown during CData constructor 2878 class CDataException : XMLException 2879 { private this(string msg) @safe pure { super(msg); } } 2880 2881 /// Thrown during XMLInstruction constructor 2882 class XIException : XMLException 2883 { private this(string msg) @safe pure { super(msg); } } 2884 2885 /// Thrown during ProcessingInstruction constructor 2886 class PIException : XMLException 2887 { private this(string msg) @safe pure { super(msg); } } 2888 2889 /// Thrown during Text constructor 2890 class TextException : XMLException 2891 { private this(string msg) @safe pure { super(msg); } } 2892 2893 /// Thrown during decode() 2894 class DecodeException : XMLException 2895 { private this(string msg) @safe pure { super(msg); } } 2896 2897 /// Thrown if comparing with wrong type 2898 class InvalidTypeException : XMLException 2899 { private this(string msg) @safe pure { super(msg); } } 2900 2901 /// Thrown when parsing for Tags 2902 class TagException : XMLException 2903 { private this(string msg) @safe pure { super(msg); } } 2904 2905 /** 2906 * Thrown during check() 2907 */ 2908 class CheckException : XMLException 2909 { 2910 CheckException err; /// Parent in hierarchy 2911 private string tail; 2912 /** 2913 * Name of production rule which failed to parse, 2914 * or specific error message 2915 */ 2916 string msg; 2917 size_t line = 0; /// Line number at which parse failure occurred 2918 size_t column = 0; /// Column number at which parse failure occurred 2919 2920 private this(string tail,string msg,Err err=null) @safe pure 2921 { 2922 super(null); 2923 this.tail = tail; 2924 this.msg = msg; 2925 this.err = err; 2926 } 2927 2928 private void complete(string entire) @safe pure 2929 { 2930 import std..string : count, lastIndexOf; 2931 import std.utf : toUTF32; 2932 2933 string head = entire[0..$-tail.length]; 2934 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2935 line = head.count("\n") + 1; 2936 dstring t = toUTF32(head[n..$]); 2937 column = t.length + 1; 2938 if (err !is null) err.complete(entire); 2939 } 2940 2941 override string toString() const @safe pure 2942 { 2943 import std.format : format; 2944 2945 string s; 2946 if (line != 0) s = format("Line %d, column %d: ",line,column); 2947 s ~= msg; 2948 s ~= '\n'; 2949 if (err !is null) s = err.toString() ~ s; 2950 return s; 2951 } 2952 } 2953 2954 private alias Err = CheckException; 2955 2956 // Private helper functions 2957 2958 private 2959 { 2960 inout(T) toType(T)(inout Object o) 2961 { 2962 T t = cast(T)(o); 2963 if (t is null) 2964 { 2965 throw new InvalidTypeException("Attempt to compare a " 2966 ~ T.stringof ~ " with an instance of another type"); 2967 } 2968 return t; 2969 } 2970 2971 string chop(ref string s, size_t n) @safe pure nothrow 2972 { 2973 if (n == -1) n = s.length; 2974 string t = s[0 .. n]; 2975 s = s[n..$]; 2976 return t; 2977 } 2978 2979 bool optc(ref string s, char c) @safe pure nothrow 2980 { 2981 immutable bool b = s.length != 0 && s[0] == c; 2982 if (b) s = s[1..$]; 2983 return b; 2984 } 2985 2986 void reqc(ref string s, char c) @safe pure 2987 { 2988 if (s.length == 0 || s[0] != c) throw new TagException(""); 2989 s = s[1..$]; 2990 } 2991 2992 char requireOneOf(ref string s, string chars) @safe pure 2993 { 2994 import std..string : indexOf; 2995 2996 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2997 throw new TagException(""); 2998 immutable char ch = s[0]; 2999 s = s[1..$]; 3000 return ch; 3001 } 3002 3003 size_t hash(string s,size_t h=0) @trusted nothrow 3004 { 3005 return typeid(s).getHash(&s) + h; 3006 } 3007 3008 // Definitions from the XML specification 3009 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3010 0x10000,0x10FFFF]; 3011 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3012 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3013 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3014 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3015 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3016 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3017 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3018 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3019 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3020 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3021 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3022 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3023 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3024 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3025 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3026 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3027 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3028 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3029 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3030 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3031 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3032 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3033 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3034 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3035 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3036 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3037 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3038 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3039 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3040 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3041 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3042 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3043 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3044 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3045 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3046 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3047 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3048 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3049 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3050 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3051 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3052 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3053 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3054 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3055 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3056 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3057 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3058 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3059 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3060 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3061 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3062 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3063 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3064 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3065 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3066 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3067 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3068 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3069 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3070 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3071 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3072 0x3099,0x3099,0x309A,0x309A]; 3073 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3074 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3075 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3076 0x0ED9,0x0F20,0x0F29]; 3077 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3078 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3079 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3080 3081 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3082 { 3083 while (table.length != 0) 3084 { 3085 auto m = (table.length >> 1) & ~1; 3086 if (c < table[m]) 3087 { 3088 table = table[0 .. m]; 3089 } 3090 else if (c > table[m+1]) 3091 { 3092 table = table[m+2..$]; 3093 } 3094 else return true; 3095 } 3096 return false; 3097 } 3098 3099 string startOf(string s) @safe nothrow pure 3100 { 3101 string r; 3102 foreach (char c;s) 3103 { 3104 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3105 if (r.length >= 40) { r ~= "___"; break; } 3106 } 3107 return r; 3108 } 3109 3110 void exit(string s=null) 3111 { 3112 throw new XMLException(s); 3113 } 3114 } 3115 3116 final class Attribute : Element 3117 { 3118 private alias string tstring; 3119 private tstring name_; 3120 private tstring value_; 3121 3122 this (tstring name, tstring value, Element parent) 3123 { 3124 super(name); 3125 name_ = name; 3126 value_ = value; 3127 this.parent = parent; 3128 } 3129 3130 override tstring name () 3131 { 3132 return name_; 3133 } 3134 3135 override tstring value () 3136 { 3137 return value_; 3138 } 3139 } 3140 3141 Element[] children(Element self) 3142 { 3143 import std.algorithm : map; 3144 import std.array : array; 3145 3146 return self.elements.map!(e => cast(Element) e).array; 3147 } 3148 3149 Attribute[] attributes (Element self) 3150 { 3151 auto attrs = new Attribute[self.tag.attr.length]; 3152 attrs = attrs[0 .. 0]; 3153 3154 foreach (k, v ; self.tag.attr) 3155 attrs ~= new Attribute(k, v, self); 3156 3157 return attrs; 3158 } 3159 3160 Element query (Element self) 3161 { 3162 return self; 3163 } 3164 3165 Element attribute (Element self, string prefix, string name, string value = null) 3166 { 3167 self.tag.attr[name] = value; 3168 3169 return self; 3170 }