orange.xml.PhobosXml source code

1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will remain until we have a suitable replacement,
6       but be aware that it will not remain long term.)
7 
8 Classes and functions for creating and parsing XML
9 
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
17 
18 Example: This example creates a DOM (Document Object Model) tree
19     from an XML file.
20 ------------------------------------------------------------------------------
21 import std.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
25 
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30 
31 void main()
32 {
33     string s = cast(string) std.file.read("books.xml");
34 
35     // Check for well-formedness
36     check(s);
37 
38     // Make a DOM tree
39     auto doc = new Document(s);
40 
41     // Plain-print it
42     writeln(doc);
43 }
44 ------------------------------------------------------------------------------
45 
46 Example: This example does much the same thing, except that the file is
47     deconstructed and reconstructed by hand. This is more work, but the
48     techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import std.xml;
51 import std.stdio;
52 import std.string;
53 
54 struct Book
55 {
56     string id;
57     string author;
58     string title;
59     string genre;
60     string price;
61     string pubDate;
62     string description;
63 }
64 
65 void main()
66 {
67     string s = cast(string) std.file.read("books.xml");
68 
69     // Check for well-formedness
70     check(s);
71 
72     // Take it apart
73     Book[] books;
74 
75     auto xml = new DocumentParser(s);
76     xml.onStartTag["book"] = (ElementParser xml)
77     {
78         Book book;
79         book.id = xml.tag.attr["id"];
80 
81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
87 
88         xml.parse();
89 
90         books ~= book;
91     };
92     xml.parse();
93 
94     // Put it back together again;
95     auto doc = new Document(new Tag("catalog"));
96     foreach (book;books)
97     {
98         auto element = new Element("book");
99         element.tag.attr["id"] = book.id;
100 
101         element ~= new Element("author",      book.author);
102         element ~= new Element("title",       book.title);
103         element ~= new Element("genre",       book.genre);
104         element ~= new Element("price",       book.price);
105         element ~= new Element("publish-date",book.pubDate);
106         element ~= new Element("description", book.description);
107 
108         doc ~= element;
109     }
110 
111     // Pretty-print it
112     writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors:   Janice Caron
118 Source:    $(PHOBOSSRC std/_xml.d)
119 */
120 /*
121          Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123    (See accompanying file LICENSE_1_0.txt or copy at
124          http://www.boost.org/LICENSE_1_0.txt)
125 */
126 module orange.xml.PhobosXml;
127 
128 enum cdata = "<![CDATA[";
129 
130 /**
131  * Returns true if the character is a character according to the XML standard
132  *
133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134  *
135  * Params:
136  *    c = the character to be tested
137  */
138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139 {
140     if (c <= 0xD7FF)
141     {
142         if (c >= 0x20)
143             return true;
144         switch (c)
145         {
146         case 0xA:
147         case 0x9:
148         case 0xD:
149             return true;
150         default:
151             return false;
152         }
153     }
154     else if (0xE000 <= c && c <= 0x10FFFF)
155     {
156         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157             return true;
158     }
159     return false;
160 }
161 
162 @safe @nogc nothrow pure unittest
163 {
164     assert(!isChar(cast(dchar) 0x8));
165     assert( isChar(cast(dchar) 0x9));
166     assert( isChar(cast(dchar) 0xA));
167     assert(!isChar(cast(dchar) 0xB));
168     assert(!isChar(cast(dchar) 0xC));
169     assert( isChar(cast(dchar) 0xD));
170     assert(!isChar(cast(dchar) 0xE));
171     assert(!isChar(cast(dchar) 0x1F));
172     assert( isChar(cast(dchar) 0x20));
173     assert( isChar('J'));
174     assert( isChar(cast(dchar) 0xD7FF));
175     assert(!isChar(cast(dchar) 0xD800));
176     assert(!isChar(cast(dchar) 0xDFFF));
177     assert( isChar(cast(dchar) 0xE000));
178     assert( isChar(cast(dchar) 0xFFFD));
179     assert(!isChar(cast(dchar) 0xFFFE));
180     assert(!isChar(cast(dchar) 0xFFFF));
181     assert( isChar(cast(dchar) 0x10000));
182     assert( isChar(cast(dchar) 0x10FFFF));
183     assert(!isChar(cast(dchar) 0x110000));
184 
185     debug (stdxml_TestHardcodedChecks)
186     {
187         foreach (c; 0 .. dchar.max + 1)
188             assert(isChar(c) == lookup(CharTable, c));
189     }
190 }
191 
192 /**
193  * Returns true if the character is whitespace according to the XML standard
194  *
195  * Only the following characters are considered whitespace in XML - space, tab,
196  * carriage return and linefeed
197  *
198  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199  *
200  * Params:
201  *    c = the character to be tested
202  */
203 bool isSpace(dchar c) @safe @nogc pure nothrow
204 {
205     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206 }
207 
208 /**
209  * Returns true if the character is a digit according to the XML standard
210  *
211  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212  *
213  * Params:
214  *    c = the character to be tested
215  */
216 bool isDigit(dchar c) @safe @nogc pure nothrow
217 {
218     if (c <= 0x0039 && c >= 0x0030)
219         return true;
220     else
221         return lookup(DigitTable,c);
222 }
223 
224 @safe @nogc nothrow pure unittest
225 {
226     debug (stdxml_TestHardcodedChecks)
227     {
228         foreach (c; 0 .. dchar.max + 1)
229             assert(isDigit(c) == lookup(DigitTable, c));
230     }
231 }
232 
233 /**
234  * Returns true if the character is a letter according to the XML standard
235  *
236  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237  *
238  * Params:
239  *    c = the character to be tested
240  */
241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242 {
243     return isIdeographic(c) || isBaseChar(c);
244 }
245 
246 /**
247  * Returns true if the character is an ideographic character according to the
248  * XML standard
249  *
250  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251  *
252  * Params:
253  *    c = the character to be tested
254  */
255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
256 {
257     if (c == 0x3007)
258         return true;
259     if (c <= 0x3029 && c >= 0x3021 )
260         return true;
261     if (c <= 0x9FA5 && c >= 0x4E00)
262         return true;
263     return false;
264 }
265 
266 @safe @nogc nothrow pure unittest
267 {
268     assert(isIdeographic('\u4E00'));
269     assert(isIdeographic('\u9FA5'));
270     assert(isIdeographic('\u3007'));
271     assert(isIdeographic('\u3021'));
272     assert(isIdeographic('\u3029'));
273 
274     debug (stdxml_TestHardcodedChecks)
275     {
276         foreach (c; 0 .. dchar.max + 1)
277             assert(isIdeographic(c) == lookup(IdeographicTable, c));
278     }
279 }
280 
281 /**
282  * Returns true if the character is a base character according to the XML
283  * standard
284  *
285  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286  *
287  * Params:
288  *    c = the character to be tested
289  */
290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
291 {
292     return lookup(BaseCharTable,c);
293 }
294 
295 /**
296  * Returns true if the character is a combining character according to the
297  * XML standard
298  *
299  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300  *
301  * Params:
302  *    c = the character to be tested
303  */
304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305 {
306     return lookup(CombiningCharTable,c);
307 }
308 
309 /**
310  * Returns true if the character is an extender according to the XML standard
311  *
312  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313  *
314  * Params:
315  *    c = the character to be tested
316  */
317 bool isExtender(dchar c) @safe @nogc nothrow pure
318 {
319     return lookup(ExtenderTable,c);
320 }
321 
322 /**
323  * Encodes a string by replacing all characters which need to be escaped with
324  * appropriate predefined XML entities.
325  *
326  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327  * and greater-than), and similarly, decode() unescapes them. These functions
328  * are provided for convenience only. You do not need to use them when using
329  * the std.xml classes, because then all the encoding and decoding will be done
330  * for you automatically.
331  *
332  * If the string is not modified, the original will be returned.
333  *
334  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335  *
336  * Params:
337  *      s = The string to be encoded
338  *
339  * Returns: The encoded string
340  *
341  * Example:
342  * --------------
343  * writefln(encode("a > b")); // writes "a &gt; b"
344  * --------------
345  */
346 S encode(S)(S s)
347 {
348     import std.array : appender;
349 
350     string r;
351     size_t lastI;
352     auto result = appender!S();
353 
354     foreach (i, c; s)
355     {
356         switch (c)
357         {
358         case '&':  r = "&amp;"; break;
359         case '"':  r = "&quot;"; break;
360         case '\'': r = "&apos;"; break;
361         case '<':  r = "&lt;"; break;
362         case '>':  r = "&gt;"; break;
363         default: continue;
364         }
365         // Replace with r
366         result.put(s[lastI .. i]);
367         result.put(r);
368         lastI = i + 1;
369     }
370 
371     if (!result.data.ptr) return s;
372     result.put(s[lastI .. $]);
373     return result.data;
374 }
375 
376 @safe pure unittest
377 {
378     auto s = "hello";
379     assert(encode(s) is s);
380     assert(encode("a > b") == "a &gt; b", encode("a > b"));
381     assert(encode("a < b") == "a &lt; b");
382     assert(encode("don't") == "don&apos;t");
383     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384     assert(encode("cat & dog") == "cat &amp; dog");
385 }
386 
387 /**
388  * Mode to use for decoding.
389  *
390  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393  */
394 enum DecodeMode
395 {
396     NONE, LOOSE, STRICT
397 }
398 
399 /**
400  * Decodes a string by unescaping all predefined XML entities.
401  *
402  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403  * and greater-than), and similarly, decode() unescapes them. These functions
404  * are provided for convenience only. You do not need to use them when using
405  * the std.xml classes, because then all the encoding and decoding will be done
406  * for you automatically.
407  *
408  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409  * &amp;lt; and &amp;gt,
410  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
411  *
412  * If the string does not contain an ampersand, the original will be returned.
413  *
414  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416  * (decode, and throw a DecodeException in the event of an error).
417  *
418  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419  *
420  * Params:
421  *      s = The string to be decoded
422  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423  *
424  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425  *
426  * Returns: The decoded string
427  *
428  * Example:
429  * --------------
430  * writefln(decode("a &gt; b")); // writes "a > b"
431  * --------------
432  */
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434 {
435     import std.algorithm.searching : startsWith;
436 
437     if (mode == DecodeMode.NONE) return s;
438 
439     string buffer;
440     foreach (ref i; 0 .. s.length)
441     {
442         char c = s[i];
443         if (c != '&')
444         {
445             if (buffer.length != 0) buffer ~= c;
446         }
447         else
448         {
449             if (buffer.length == 0)
450             {
451                 buffer = s[0 .. i].dup;
452             }
453             if (startsWith(s[i..$],"&#"))
454             {
455                 try
456                 {
457                     dchar d;
458                     string t = s[i..$];
459                     checkCharRef(t, d);
460                     char[4] temp;
461                     import std.utf : encode;
462                     buffer ~= temp[0 .. encode(temp, d)];
463                     i = s.length - t.length - 1;
464                 }
465                 catch (Err e)
466                 {
467                     if (mode == DecodeMode.STRICT)
468                         throw new DecodeException("Unescaped &");
469                     buffer ~= '&';
470                 }
471             }
472             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
473             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
474             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
476             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
477             else
478             {
479                 if (mode == DecodeMode.STRICT)
480                     throw new DecodeException("Unescaped &");
481                 buffer ~= '&';
482             }
483         }
484     }
485     return (buffer.length == 0) ? s : buffer;
486 }
487 
488 @safe pure unittest
489 {
490     void assertNot(string s) pure
491     {
492         bool b = false;
493         try { decode(s,DecodeMode.STRICT); }
494         catch (DecodeException e) { b = true; }
495         assert(b,s);
496     }
497 
498     // Assert that things that should work, do
499     auto s = "hello";
500     assert(decode(s,                DecodeMode.STRICT) is s);
501     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
502     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
503     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
504     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
506     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
507     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
508     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
509     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
510     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
511     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
512     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
513     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
514 
515     // Assert that things that shouldn't work, don't
516     assertNot("cat & dog");
517     assertNot("a &gt b");
518     assertNot("&#;");
519     assertNot("&#x;");
520     assertNot("&#2G;");
521     assertNot("&#x2G;");
522 }
523 
524 /**
525  * Class representing an XML document.
526  *
527  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528  *
529  */
530 class Document : Element
531 {
532     /**
533      * Contains all text which occurs before the root element.
534      * Defaults to &lt;?xml version="1.0"?&gt;
535      */
536     string prolog = "<?xml version=\"1.0\"?>";
537     /**
538      * Contains all text which occurs after the root element.
539      * Defaults to the empty string
540      */
541     string epilog;
542 
543     /**
544      * Constructs a Document by parsing XML text.
545      *
546      * This function creates a complete DOM (Document Object Model) tree.
547      *
548      * The input to this function MUST be valid XML.
549      * This is enforced by DocumentParser's in contract.
550      *
551      * Params:
552      *      s = the complete XML text.
553      */
554     this(string s)
555     in
556     {
557         assert(s.length != 0);
558     }
559     body
560     {
561         auto xml = new DocumentParser(s);
562         string tagString = xml.tag.tagString;
563 
564         this(xml.tag);
565         prolog = s[0 .. tagString.ptr - s.ptr];
566         parse(xml);
567         epilog = *xml.s;
568     }
569 
570     /**
571      * Constructs a Document from a Tag.
572      *
573      * Params:
574      *      tag = the start tag of the document.
575      */
576     this(const(Tag) tag)
577     {
578         super(tag);
579     }
580 
581     const
582     {
583         /**
584          * Compares two Documents for equality
585          *
586          * Example:
587          * --------------
588          * Document d1,d2;
589          * if (d1 == d2) { }
590          * --------------
591          */
592         override bool opEquals(scope const Object o) const
593         {
594             const doc = toType!(const Document)(o);
595             return prolog == doc.prolog
596                 && (cast(const) this).Element.opEquals(cast(const) doc)
597                 && epilog == doc.epilog;
598         }
599 
600         /**
601          * Compares two Documents
602          *
603          * You should rarely need to call this function. It exists so that
604          * Documents can be used as associative array keys.
605          *
606          * Example:
607          * --------------
608          * Document d1,d2;
609          * if (d1 < d2) { }
610          * --------------
611          */
612         override int opCmp(scope const Object o) scope const
613         {
614             const doc = toType!(const Document)(o);
615             if (prolog != doc.prolog)
616                 return prolog < doc.prolog ? -1 : 1;
617             if (int cmp = this.Element.opCmp(doc))
618                 return cmp;
619             if (epilog != doc.epilog)
620                 return epilog < doc.epilog ? -1 : 1;
621             return 0;
622         }
623 
624         /**
625          * Returns the hash of a Document
626          *
627          * You should rarely need to call this function. It exists so that
628          * Documents can be used as associative array keys.
629          */
630         override size_t toHash() scope const @trusted
631         {
632             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633         }
634 
635         /**
636          * Returns the string representation of a Document. (That is, the
637          * complete XML of a document).
638          */
639         override string toString() scope const @safe
640         {
641             return prolog ~ super.toString() ~ epilog;
642         }
643     }
644 }
645 
646 @system unittest
647 {
648     // https://issues.dlang.org/show_bug.cgi?id=14966
649     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
650 
651     auto a = new Document(xml);
652     auto b = new Document(xml);
653     assert(a == b);
654     assert(!(a < b));
655     int[Document] aa;
656     aa[a] = 1;
657     assert(aa[b] == 1);
658 
659     b ~= new Element("b");
660     assert(a < b);
661     assert(b > a);
662 }
663 
664 /**
665  * Class representing an XML element.
666  *
667  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668  */
669 class Element : Item
670 {
671     Tag tag; /// The start tag of the element
672     Item[] items; /// The element's items
673     Text[] texts; /// The element's text items
674     CData[] cdatas; /// The element's CData items
675     Comment[] comments; /// The element's comments
676     ProcessingInstruction[] pis; /// The element's processing instructions
677     Element[] elements; /// The element's child elements
678     Element parent;
679 
680     string name ()
681     {
682         return tag.name;
683     }
684 
685     string value ()
686     {
687         return text;
688     }
689 
690     /**
691      * Constructs an Element given a name and a string to be used as a Text
692      * interior.
693      *
694      * Params:
695      *      name = the name of the element.
696      *      interior = (optional) the string interior.
697      *
698      * Example:
699      * -------------------------------------------------------
700      * auto element = new Element("title","Serenity")
701      *     // constructs the element <title>Serenity</title>
702      * -------------------------------------------------------
703      */
704     this(string name, string interior=null) @safe pure
705     {
706         this(new Tag(name));
707         if (interior.length != 0) this ~= new Text(interior);
708     }
709 
710     /**
711      * Constructs an Element from a Tag.
712      *
713      * Params:
714      *      tag_ = the start or empty tag of the element.
715      */
716     this(const(Tag) tag_) @safe pure
717     {
718         this.tag = new Tag(tag_.name);
719         tag.type = TagType.EMPTY;
720         foreach (k,v;tag_.attr) tag.attr[k] = v;
721         tag.tagString = tag_.tagString;
722     }
723 
724     /**
725      * Append a text item to the interior of this element
726      *
727      * Params:
728      *      item = the item you wish to append.
729      *
730      * Example:
731      * --------------
732      * Element element;
733      * element ~= new Text("hello");
734      * --------------
735      */
736     void opOpAssign(string op : "~")(Text item) @safe pure
737     {
738         texts ~= item;
739         appendItem(item);
740     }
741 
742     /**
743      * Append a CData item to the interior of this element
744      *
745      * Params:
746      *      item = the item you wish to append.
747      *
748      * Example:
749      * --------------
750      * Element element;
751      * element ~= new CData("hello");
752      * --------------
753      */
754     void opOpAssign(string op : "~")(CData item) @safe pure
755     {
756         cdatas ~= item;
757         appendItem(item);
758     }
759 
760     /**
761      * Append a comment to the interior of this element
762      *
763      * Params:
764      *      item = the item you wish to append.
765      *
766      * Example:
767      * --------------
768      * Element element;
769      * element ~= new Comment("hello");
770      * --------------
771      */
772     void opOpAssign(string op : "~")(Comment item) @safe pure
773     {
774         comments ~= item;
775         appendItem(item);
776     }
777 
778     /**
779      * Append a processing instruction to the interior of this element
780      *
781      * Params:
782      *      item = the item you wish to append.
783      *
784      * Example:
785      * --------------
786      * Element element;
787      * element ~= new ProcessingInstruction("hello");
788      * --------------
789      */
790     void opOpAssign(string op : "~")(ProcessingInstruction item) @safe pure
791     {
792         pis ~= item;
793         appendItem(item);
794     }
795 
796     /**
797      * Append a complete element to the interior of this element
798      *
799      * Params:
800      *      item = the item you wish to append.
801      *
802      * Example:
803      * --------------
804      * Element element;
805      * Element other = new Element("br");
806      * element ~= other;
807      *    // appends element representing <br />
808      * --------------
809      */
810     void opOpAssign(string op : "~")(Element item) @safe pure
811     {
812         elements ~= item;
813         appendItem(item);
814     }
815 
816     private void appendItem(Item item) @safe pure
817     {
818         items ~= item;
819         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
820             tag.type = TagType.START;
821     }
822 
823     private void parse(ElementParser xml)
824     {
825         xml.onText = (string s) { this ~= new Text(s); };
826         xml.onCData = (string s) { this ~= new CData(s); };
827         xml.onComment = (string s) { this ~= new Comment(s); };
828         xml.onPI = (string s) { this ~= new ProcessingInstruction(s); };
829 
830         xml.onStartTag[null] = (ElementParser xml)
831         {
832             auto e = new Element(xml.tag);
833             e.parse(xml);
834             this ~= e;
835         };
836 
837         xml.parse();
838     }
839 
840     /**
841      * Compares two Elements for equality
842      *
843      * Example:
844      * --------------
845      * Element e1,e2;
846      * if (e1 == e2) { }
847      * --------------
848      */
849     override bool opEquals(scope const Object o) const
850     {
851         const element = toType!(const Element)(o);
852         immutable len = items.length;
853         if (len != element.items.length) return false;
854         foreach (i; 0 .. len)
855         {
856             if (!items[i].opEquals(element.items[i])) return false;
857         }
858         return true;
859     }
860 
861     /**
862      * Compares two Elements
863      *
864      * You should rarely need to call this function. It exists so that Elements
865      * can be used as associative array keys.
866      *
867      * Example:
868      * --------------
869      * Element e1,e2;
870      * if (e1 < e2) { }
871      * --------------
872      */
873     override int opCmp(scope const Object o) @safe const
874     {
875         const element = toType!(const Element)(o);
876         for (uint i=0; ; ++i)
877         {
878             if (i == items.length && i == element.items.length) return 0;
879             if (i == items.length) return -1;
880             if (i == element.items.length) return 1;
881             if (!items[i].opEquals(element.items[i]))
882                 return items[i].opCmp(element.items[i]);
883         }
884     }
885 
886     /**
887      * Returns the hash of an Element
888      *
889      * You should rarely need to call this function. It exists so that Elements
890      * can be used as associative array keys.
891      */
892     override size_t toHash() scope const @safe
893     {
894         size_t hash = tag.toHash();
895         foreach (item;items) hash += item.toHash();
896         return hash;
897     }
898 
899     const
900     {
901         /**
902          * Returns the decoded interior of an element.
903          *
904          * The element is assumed to contain text <i>only</i>. So, for
905          * example, given XML such as "&lt;title&gt;Good &amp;amp;
906          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
907          *
908          * Params:
909          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
910          *
911          * Throws: DecodeException if decode fails
912          */
913         string text(DecodeMode mode=DecodeMode.LOOSE)
914         {
915             string buffer;
916             foreach (item;items)
917             {
918                 Text t = cast(Text) item;
919                 if (t is null) throw new DecodeException(item.toString());
920                 buffer ~= decode(t.toString(),mode);
921             }
922             return buffer;
923         }
924 
925         /**
926          * Returns an indented string representation of this item
927          *
928          * Params:
929          *      indent = (optional) number of spaces by which to indent this
930          *          element. Defaults to 2.
931          */
932         override string[] pretty(uint indent=2) scope
933         {
934             import std.algorithm.searching : count;
935             import std..string : rightJustify;
936 
937             if (isEmptyXML) return [ tag.toEmptyString() ];
938 
939             if (items.length == 1)
940             {
941                 auto t = cast(const(Text))(items[0]);
942                 if (t !is null)
943                 {
944                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
945                 }
946             }
947 
948             string[] a = [ tag.toStartString() ];
949             foreach (item;items)
950             {
951                 string[] b = item.pretty(indent);
952                 foreach (s;b)
953                 {
954                     a ~= rightJustify(s,count(s) + indent);
955                 }
956             }
957             a ~= tag.toEndString();
958             return a;
959         }
960 
961         /**
962          * Returns the string representation of an Element
963          *
964          * Example:
965          * --------------
966          * auto element = new Element("br");
967          * writefln(element.toString()); // writes "<br />"
968          * --------------
969          */
970         override string toString() scope @safe
971         {
972             if (isEmptyXML) return tag.toEmptyString();
973 
974             string buffer = tag.toStartString();
975             foreach (item;items) { buffer ~= item.toString(); }
976             buffer ~= tag.toEndString();
977             return buffer;
978         }
979 
980         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
981     }
982 }
983 
984 /**
985  * Tag types.
986  *
987  * $(DDOC_ENUM_MEMBERS START) Used for start tags
988  * $(DDOC_ENUM_MEMBERS END) Used for end tags
989  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
990  *
991  */
992 enum TagType { START, END, EMPTY }
993 
994 /**
995  * Class representing an XML tag.
996  *
997  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
998  *
999  * The class invariant guarantees
1000  * <ul>
1001  * <li> that $(B type) is a valid enum TagType value</li>
1002  * <li> that $(B name) consists of valid characters</li>
1003  * <li> that each attribute name consists of valid characters</li>
1004  * </ul>
1005  */
1006 class Tag
1007 {
1008     TagType type = TagType.START;   /// Type of tag
1009     string name;                    /// Tag name
1010     string[string] attr;            /// Associative array of attributes
1011     private string tagString;
1012 
1013     invariant()
1014     {
1015         string s;
1016         string t;
1017 
1018         assert(type == TagType.START
1019             || type == TagType.END
1020             || type == TagType.EMPTY);
1021 
1022         s = name;
1023         try { checkName(s,t); }
1024         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1025 
1026         foreach (k,v;attr)
1027         {
1028             s = k;
1029             try { checkName(s,t); }
1030             catch (Err e)
1031                 { assert(false,"Invalid atrribute name:" ~ e.toString()); }
1032         }
1033     }
1034 
1035     /**
1036      * Constructs an instance of Tag with a specified name and type
1037      *
1038      * The constructor does not initialize the attributes. To initialize the
1039      * attributes, you access the $(B attr) member variable.
1040      *
1041      * Params:
1042      *      name = the Tag's name
1043      *      type = (optional) the Tag's type. If omitted, defaults to
1044      *          TagType.START.
1045      *
1046      * Example:
1047      * --------------
1048      * auto tag = new Tag("img",Tag.EMPTY);
1049      * tag.attr["src"] = "http://example.com/example.jpg";
1050      * --------------
1051      */
1052     this(string name, TagType type=TagType.START) @safe pure
1053     {
1054         this.name = name;
1055         this.type = type;
1056     }
1057 
1058     /* Private constructor (so don't ddoc this!)
1059      *
1060      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1061      *
1062      * The string is passed by reference, and is advanced over all characters
1063      * consumed.
1064      *
1065      * The second parameter is a dummy parameter only, required solely to
1066      * distinguish this constructor from the public one.
1067      */
1068     private this(ref string s, bool dummy) @safe pure
1069     {
1070         import std.algorithm.searching : countUntil;
1071         import std.ascii : isWhite;
1072         import std.utf : byCodeUnit;
1073 
1074         tagString = s;
1075         try
1076         {
1077             reqc(s,'<');
1078             if (optc(s,'/')) type = TagType.END;
1079             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1080             name = s[0 .. i];
1081             s = s[i .. $];
1082 
1083             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1084             s = s[i .. $];
1085 
1086             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1087             {
1088                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1089                 string key = s[0 .. i];
1090                 s = s[i .. $];
1091 
1092                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1093                 s = s[i .. $];
1094                 reqc(s,'=');
1095                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1096                 s = s[i .. $];
1097 
1098                 immutable char quote = requireOneOf(s,"'\"");
1099                 i = s.byCodeUnit.countUntil(quote);
1100                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1101                 s = s[i .. $];
1102                 reqc(s,quote);
1103 
1104                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1105                 s = s[i .. $];
1106                 attr[key] = val;
1107             }
1108             if (optc(s,'/'))
1109             {
1110                 if (type == TagType.END) throw new TagException("");
1111                 type = TagType.EMPTY;
1112             }
1113             reqc(s,'>');
1114             tagString.length = tagString.length - s.length;
1115         }
1116         catch (XMLException e)
1117         {
1118             tagString.length = tagString.length - s.length;
1119             throw new TagException(tagString);
1120         }
1121     }
1122 
1123     const
1124     {
1125         /**
1126          * Compares two Tags for equality
1127          *
1128          * You should rarely need to call this function. It exists so that Tags
1129          * can be used as associative array keys.
1130          *
1131          * Example:
1132          * --------------
1133          * Tag tag1,tag2
1134          * if (tag1 == tag2) { }
1135          * --------------
1136          */
1137         override bool opEquals(scope Object o)
1138         {
1139             const tag = toType!(const Tag)(o);
1140             return
1141                 (name != tag.name) ? false : (
1142                 (attr != tag.attr) ? false : (
1143                 (type != tag.type) ? false : (
1144             true )));
1145         }
1146 
1147         /**
1148          * Compares two Tags
1149          *
1150          * Example:
1151          * --------------
1152          * Tag tag1,tag2
1153          * if (tag1 < tag2) { }
1154          * --------------
1155          */
1156         override int opCmp(Object o)
1157         {
1158             const tag = toType!(const Tag)(o);
1159             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1160             return
1161                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1162                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1163                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1164             0 )));
1165         }
1166 
1167         /**
1168          * Returns the hash of a Tag
1169          *
1170          * You should rarely need to call this function. It exists so that Tags
1171          * can be used as associative array keys.
1172          */
1173         override size_t toHash()
1174         {
1175             return typeid(name).getHash(&name);
1176         }
1177 
1178         /**
1179          * Returns the string representation of a Tag
1180          *
1181          * Example:
1182          * --------------
1183          * auto tag = new Tag("book",TagType.START);
1184          * writefln(tag.toString()); // writes "<book>"
1185          * --------------
1186          */
1187         override string toString() @safe
1188         {
1189             if (isEmpty) return toEmptyString();
1190             return (isEnd) ? toEndString() : toStartString();
1191         }
1192 
1193         private
1194         {
1195             string toNonEndString() @safe
1196             {
1197                 import std.format : format;
1198 
1199                 string s = "<" ~ name;
1200                 foreach (key,val;attr)
1201                     s ~= format(" %s=\"%s\"",key,encode(val));
1202                 return s;
1203             }
1204 
1205             string toStartString() @safe { return toNonEndString() ~ ">"; }
1206 
1207             string toEndString() @safe { return "</" ~ name ~ ">"; }
1208 
1209             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1210         }
1211 
1212         /**
1213          * Returns true if the Tag is a start tag
1214          *
1215          * Example:
1216          * --------------
1217          * if (tag.isStart) { }
1218          * --------------
1219          */
1220         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1221 
1222         /**
1223          * Returns true if the Tag is an end tag
1224          *
1225          * Example:
1226          * --------------
1227          * if (tag.isEnd) { }
1228          * --------------
1229          */
1230         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1231 
1232         /**
1233          * Returns true if the Tag is an empty tag
1234          *
1235          * Example:
1236          * --------------
1237          * if (tag.isEmpty) { }
1238          * --------------
1239          */
1240         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1241     }
1242 }
1243 
1244 /**
1245  * Class representing a comment
1246  */
1247 class Comment : Item
1248 {
1249     private string content;
1250 
1251     /**
1252      * Construct a comment
1253      *
1254      * Params:
1255      *      content = the body of the comment
1256      *
1257      * Throws: CommentException if the comment body is illegal (contains "--"
1258      * or exactly equals "-")
1259      *
1260      * Example:
1261      * --------------
1262      * auto item = new Comment("This is a comment");
1263      *    // constructs <!--This is a comment-->
1264      * --------------
1265      */
1266     this(string content) @safe pure
1267     {
1268         import std..string : indexOf;
1269 
1270         if (content == "-" || content.indexOf("--") != -1)
1271             throw new CommentException(content);
1272         this.content = content;
1273     }
1274 
1275     /**
1276      * Compares two comments for equality
1277      *
1278      * Example:
1279      * --------------
1280      * Comment item1,item2;
1281      * if (item1 == item2) { }
1282      * --------------
1283      */
1284     override bool opEquals(scope const Object o) const
1285     {
1286         const item = toType!(const Item)(o);
1287         const t = cast(const Comment) item;
1288         return t !is null && content == t.content;
1289     }
1290 
1291     /**
1292      * Compares two comments
1293      *
1294      * You should rarely need to call this function. It exists so that Comments
1295      * can be used as associative array keys.
1296      *
1297      * Example:
1298      * --------------
1299      * Comment item1,item2;
1300      * if (item1 < item2) { }
1301      * --------------
1302      */
1303     override int opCmp(scope const Object o) scope const
1304     {
1305         const item = toType!(const Item)(o);
1306         const t = cast(const Comment) item;
1307         return t !is null && (content != t.content
1308             ? (content < t.content ? -1 : 1 ) : 0 );
1309     }
1310 
1311     /**
1312      * Returns the hash of a Comment
1313      *
1314      * You should rarely need to call this function. It exists so that Comments
1315      * can be used as associative array keys.
1316      */
1317     override size_t toHash() scope const nothrow { return hash(content); }
1318 
1319     /**
1320      * Returns a string representation of this comment
1321      */
1322     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1323 
1324     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1325 }
1326 
1327 @safe unittest // issue 16241
1328 {
1329     import std.exception : assertThrown;
1330     auto c = new Comment("==");
1331     assert(c.content == "==");
1332     assertThrown!CommentException(new Comment("--"));
1333 }
1334 
1335 /**
1336  * Class representing a Character Data section
1337  */
1338 class CData : Item
1339 {
1340     private string content;
1341 
1342     /**
1343      * Construct a character data section
1344      *
1345      * Params:
1346      *      content = the body of the character data segment
1347      *
1348      * Throws: CDataException if the segment body is illegal (contains "]]>")
1349      *
1350      * Example:
1351      * --------------
1352      * auto item = new CData("<b>hello</b>");
1353      *    // constructs <![CDATA[<b>hello</b>]]>
1354      * --------------
1355      */
1356     this(string content) @safe pure
1357     {
1358         import std..string : indexOf;
1359         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1360         this.content = content;
1361     }
1362 
1363     /**
1364      * Compares two CDatas for equality
1365      *
1366      * Example:
1367      * --------------
1368      * CData item1,item2;
1369      * if (item1 == item2) { }
1370      * --------------
1371      */
1372     override bool opEquals(scope const Object o) const
1373     {
1374         const item = toType!(const Item)(o);
1375         const t = cast(const CData) item;
1376         return t !is null && content == t.content;
1377     }
1378 
1379     /**
1380      * Compares two CDatas
1381      *
1382      * You should rarely need to call this function. It exists so that CDatas
1383      * can be used as associative array keys.
1384      *
1385      * Example:
1386      * --------------
1387      * CData item1,item2;
1388      * if (item1 < item2) { }
1389      * --------------
1390      */
1391     override int opCmp(scope const Object o) scope const
1392     {
1393         const item = toType!(const Item)(o);
1394         const t = cast(const CData) item;
1395         return t !is null && (content != t.content
1396             ? (content < t.content ? -1 : 1 ) : 0 );
1397     }
1398 
1399     /**
1400      * Returns the hash of a CData
1401      *
1402      * You should rarely need to call this function. It exists so that CDatas
1403      * can be used as associative array keys.
1404      */
1405     override size_t toHash() scope const nothrow { return hash(content); }
1406 
1407     /**
1408      * Returns a string representation of this CData section
1409      */
1410     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1411 
1412     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1413 }
1414 
1415 /**
1416  * Class representing a text (aka Parsed Character Data) section
1417  */
1418 class Text : Item
1419 {
1420     private string content;
1421 
1422     /**
1423      * Construct a text (aka PCData) section
1424      *
1425      * Params:
1426      *      content = the text. This function encodes the text before
1427      *      insertion, so it is safe to insert any text
1428      *
1429      * Example:
1430      * --------------
1431      * auto Text = new CData("a < b");
1432      *    // constructs a &lt; b
1433      * --------------
1434      */
1435     this(string content) @safe pure
1436     {
1437         this.content = encode(content);
1438     }
1439 
1440     /**
1441      * Compares two text sections for equality
1442      *
1443      * Example:
1444      * --------------
1445      * Text item1,item2;
1446      * if (item1 == item2) { }
1447      * --------------
1448      */
1449     override bool opEquals(scope const Object o) const
1450     {
1451         const item = toType!(const Item)(o);
1452         const t = cast(const Text) item;
1453         return t !is null && content == t.content;
1454     }
1455 
1456     /**
1457      * Compares two text sections
1458      *
1459      * You should rarely need to call this function. It exists so that Texts
1460      * can be used as associative array keys.
1461      *
1462      * Example:
1463      * --------------
1464      * Text item1,item2;
1465      * if (item1 < item2) { }
1466      * --------------
1467      */
1468     override int opCmp(scope const Object o) scope const
1469     {
1470         const item = toType!(const Item)(o);
1471         const t = cast(const Text) item;
1472         return t !is null
1473             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1474     }
1475 
1476     /**
1477      * Returns the hash of a text section
1478      *
1479      * You should rarely need to call this function. It exists so that Texts
1480      * can be used as associative array keys.
1481      */
1482     override size_t toHash() scope const nothrow { return hash(content); }
1483 
1484     /**
1485      * Returns a string representation of this Text section
1486      */
1487     override string toString() scope const @safe @nogc pure nothrow { return content; }
1488 
1489     /**
1490      * Returns true if the content is the empty string
1491      */
1492     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1493 }
1494 
1495 /**
1496  * Class representing an XML Instruction section
1497  */
1498 class XMLInstruction : Item
1499 {
1500     private string content;
1501 
1502     /**
1503      * Construct an XML Instruction section
1504      *
1505      * Params:
1506      *      content = the body of the instruction segment
1507      *
1508      * Throws: XIException if the segment body is illegal (contains ">")
1509      *
1510      * Example:
1511      * --------------
1512      * auto item = new XMLInstruction("ATTLIST");
1513      *    // constructs <!ATTLIST>
1514      * --------------
1515      */
1516     this(string content) @safe pure
1517     {
1518         import std..string : indexOf;
1519         if (content.indexOf(">") != -1) throw new XIException(content);
1520         this.content = content;
1521     }
1522 
1523     /**
1524      * Compares two XML instructions for equality
1525      *
1526      * Example:
1527      * --------------
1528      * XMLInstruction item1,item2;
1529      * if (item1 == item2) { }
1530      * --------------
1531      */
1532     override bool opEquals(scope const Object o) const
1533     {
1534         const item = toType!(const Item)(o);
1535         const t = cast(const XMLInstruction) item;
1536         return t !is null && content == t.content;
1537     }
1538 
1539     /**
1540      * Compares two XML instructions
1541      *
1542      * You should rarely need to call this function. It exists so that
1543      * XmlInstructions can be used as associative array keys.
1544      *
1545      * Example:
1546      * --------------
1547      * XMLInstruction item1,item2;
1548      * if (item1 < item2) { }
1549      * --------------
1550      */
1551     override int opCmp(scope const Object o) scope const
1552     {
1553         const item = toType!(const Item)(o);
1554         const t = cast(const XMLInstruction) item;
1555         return t !is null
1556             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1557     }
1558 
1559     /**
1560      * Returns the hash of an XMLInstruction
1561      *
1562      * You should rarely need to call this function. It exists so that
1563      * XmlInstructions can be used as associative array keys.
1564      */
1565     override size_t toHash() scope const nothrow { return hash(content); }
1566 
1567     /**
1568      * Returns a string representation of this XmlInstruction
1569      */
1570     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1571 
1572     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1573 }
1574 
1575 /**
1576  * Class representing a Processing Instruction section
1577  */
1578 class ProcessingInstruction : Item
1579 {
1580     private string content;
1581 
1582     /**
1583      * Construct a Processing Instruction section
1584      *
1585      * Params:
1586      *      content = the body of the instruction segment
1587      *
1588      * Throws: PIException if the segment body is illegal (contains "?>")
1589      *
1590      * Example:
1591      * --------------
1592      * auto item = new ProcessingInstruction("php");
1593      *    // constructs <?php?>
1594      * --------------
1595      */
1596     this(string content) @safe pure
1597     {
1598         import std..string : indexOf;
1599         if (content.indexOf("?>") != -1) throw new PIException(content);
1600         this.content = content;
1601     }
1602 
1603     /**
1604      * Compares two processing instructions for equality
1605      *
1606      * Example:
1607      * --------------
1608      * ProcessingInstruction item1,item2;
1609      * if (item1 == item2) { }
1610      * --------------
1611      */
1612     override bool opEquals(scope const Object o) const
1613     {
1614         const item = toType!(const Item)(o);
1615         const t = cast(const ProcessingInstruction) item;
1616         return t !is null && content == t.content;
1617     }
1618 
1619     /**
1620      * Compares two processing instructions
1621      *
1622      * You should rarely need to call this function. It exists so that
1623      * ProcessingInstructions can be used as associative array keys.
1624      *
1625      * Example:
1626      * --------------
1627      * ProcessingInstruction item1,item2;
1628      * if (item1 < item2) { }
1629      * --------------
1630      */
1631     override int opCmp(scope const Object o) scope const
1632     {
1633         const item = toType!(const Item)(o);
1634         const t = cast(const ProcessingInstruction) item;
1635         return t !is null
1636             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1637     }
1638 
1639     /**
1640      * Returns the hash of a ProcessingInstruction
1641      *
1642      * You should rarely need to call this function. It exists so that
1643      * ProcessingInstructions can be used as associative array keys.
1644      */
1645     override size_t toHash() scope const nothrow { return hash(content); }
1646 
1647     /**
1648      * Returns a string representation of this ProcessingInstruction
1649      */
1650     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1651 
1652     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1653 }
1654 
1655 /**
1656  * Abstract base class for XML items
1657  */
1658 abstract class Item
1659 {
1660     /// Compares with another Item of same type for equality
1661     abstract override bool opEquals(scope const Object o) @safe const;
1662 
1663     /// Compares with another Item of same type
1664     abstract override int opCmp(scope const Object o) @safe const;
1665 
1666     /// Returns the hash of this item
1667     abstract override size_t toHash() @safe scope const;
1668 
1669     /// Returns a string representation of this item
1670     abstract override string toString() @safe scope const;
1671 
1672     /**
1673      * Returns an indented string representation of this item
1674      *
1675      * Params:
1676      *      indent = number of spaces by which to indent child elements
1677      */
1678     string[] pretty(uint indent) @safe scope const
1679     {
1680         import std..string : strip;
1681         string s = strip(toString());
1682         return s.length == 0 ? [] : [ s ];
1683     }
1684 
1685     /// Returns true if the item represents empty XML text
1686     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1687 }
1688 
1689 /**
1690  * Class for parsing an XML Document.
1691  *
1692  * This is a subclass of ElementParser. Most of the useful functions are
1693  * documented there.
1694  *
1695  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1696  *
1697  * Bugs:
1698  *      Currently only supports UTF documents.
1699  *
1700  *      If there is an encoding attribute in the prolog, it is ignored.
1701  *
1702  */
1703 class DocumentParser : ElementParser
1704 {
1705     string xmlText;
1706 
1707     /**
1708      * Constructs a DocumentParser.
1709      *
1710      * The input to this function MUST be valid XML.
1711      * This is enforced by the function's in contract.
1712      *
1713      * Params:
1714      *      xmlText_ = the entire XML document as text
1715      *
1716      */
1717     this(string xmlText_)
1718     in
1719     {
1720         assert(xmlText_.length != 0);
1721         try
1722         {
1723             // Confirm that the input is valid XML
1724             check(xmlText_);
1725         }
1726         catch (CheckException e)
1727         {
1728             // And if it's not, tell the user why not
1729             assert(false, "\n" ~ e.toString());
1730         }
1731     }
1732     body
1733     {
1734         xmlText = xmlText_;
1735         s = &xmlText;
1736         super();    // Initialize everything
1737         parse();    // Parse through the root tag (but not beyond)
1738     }
1739 }
1740 
1741 @system unittest
1742 {
1743     auto doc = new Document("<root><child><grandchild/></child></root>");
1744     assert(doc.elements.length == 1);
1745     assert(doc.elements[0].tag.name == "child");
1746     assert(doc.items == doc.elements);
1747 }
1748 
1749 /**
1750  * Class for parsing an XML element.
1751  *
1752  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1753  *
1754  * Note that you cannot construct instances of this class directly. You can
1755  * construct a DocumentParser (which is a subclass of ElementParser), but
1756  * otherwise, Instances of ElementParser will be created for you by the
1757  * library, and passed your way via onStartTag handlers.
1758  *
1759  */
1760 class ElementParser
1761 {
1762     alias Handler = void delegate(string);
1763     alias ElementHandler = void delegate(in Element element);
1764     alias ParserHandler = void delegate(ElementParser parser);
1765 
1766     private
1767     {
1768         Tag tag_;
1769         string elementStart;
1770         string* s;
1771 
1772         Handler commentHandler = null;
1773         Handler cdataHandler = null;
1774         Handler xiHandler = null;
1775         Handler piHandler = null;
1776         Handler rawTextHandler = null;
1777         Handler textHandler = null;
1778 
1779         // Private constructor for start tags
1780         this(ElementParser parent) @safe @nogc pure nothrow
1781         {
1782             s = parent.s;
1783             this();
1784             tag_ = parent.tag_;
1785         }
1786 
1787         // Private constructor for empty tags
1788         this(Tag tag, string* t) @safe @nogc pure nothrow
1789         {
1790             s = t;
1791             this();
1792             tag_ = tag;
1793         }
1794     }
1795 
1796     /**
1797      * The Tag at the start of the element being parsed. You can read this to
1798      * determine the tag's name and attributes.
1799      */
1800     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1801 
1802     /**
1803      * Register a handler which will be called whenever a start tag is
1804      * encountered which matches the specified name. You can also pass null as
1805      * the name, in which case the handler will be called for any unmatched
1806      * start tag.
1807      *
1808      * Example:
1809      * --------------
1810      * // Call this function whenever a <podcast> start tag is encountered
1811      * onStartTag["podcast"] = (ElementParser xml)
1812      * {
1813      *     // Your code here
1814      *     //
1815      *     // This is a a closure, so code here may reference
1816      *     // variables which are outside of this scope
1817      * };
1818      *
1819      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1820      * // start tag is encountered
1821      * onStartTag["episode"] = &myEpisodeStartHandler;
1822      *
1823      * // call delegate dg for all other start tags
1824      * onStartTag[null] = dg;
1825      * --------------
1826      *
1827      * This library will supply your function with a new instance of
1828      * ElementHandler, which may be used to parse inside the element whose
1829      * start tag was just found, or to identify the tag attributes of the
1830      * element, etc.
1831      *
1832      * Note that your function will be called for both start tags and empty
1833      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1834      * and &lt;br/&gt;.
1835      */
1836     ParserHandler[string] onStartTag;
1837 
1838     /**
1839      * Register a handler which will be called whenever an end tag is
1840      * encountered which matches the specified name. You can also pass null as
1841      * the name, in which case the handler will be called for any unmatched
1842      * end tag.
1843      *
1844      * Example:
1845      * --------------
1846      * // Call this function whenever a </podcast> end tag is encountered
1847      * onEndTag["podcast"] = (in Element e)
1848      * {
1849      *     // Your code here
1850      *     //
1851      *     // This is a a closure, so code here may reference
1852      *     // variables which are outside of this scope
1853      * };
1854      *
1855      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1856      * // end tag is encountered
1857      * onEndTag["episode"] = &myEpisodeEndHandler;
1858      *
1859      * // call delegate dg for all other end tags
1860      * onEndTag[null] = dg;
1861      * --------------
1862      *
1863      * Note that your function will be called for both start tags and empty
1864      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1865      * and &lt;br/&gt;.
1866      */
1867     ElementHandler[string] onEndTag;
1868 
1869     protected this() @safe @nogc pure nothrow
1870     {
1871         elementStart = *s;
1872     }
1873 
1874     /**
1875      * Register a handler which will be called whenever text is encountered.
1876      *
1877      * Example:
1878      * --------------
1879      * // Call this function whenever text is encountered
1880      * onText = (string s)
1881      * {
1882      *     // Your code here
1883      *
1884      *     // The passed parameter s will have been decoded by the time you see
1885      *     // it, and so may contain any character.
1886      *     //
1887      *     // This is a a closure, so code here may reference
1888      *     // variables which are outside of this scope
1889      * };
1890      * --------------
1891      */
1892     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1893 
1894     /**
1895      * Register an alternative handler which will be called whenever text
1896      * is encountered. This differs from onText in that onText will decode
1897      * the text, whereas onTextRaw will not. This allows you to make design
1898      * choices, since onText will be more accurate, but slower, while
1899      * onTextRaw will be faster, but less accurate. Of course, you can
1900      * still call decode() within your handler, if you want, but you'd
1901      * probably want to use onTextRaw only in circumstances where you
1902      * know that decoding is unnecessary.
1903      *
1904      * Example:
1905      * --------------
1906      * // Call this function whenever text is encountered
1907      * onText = (string s)
1908      * {
1909      *     // Your code here
1910      *
1911      *     // The passed parameter s will NOT have been decoded.
1912      *     //
1913      *     // This is a a closure, so code here may reference
1914      *     // variables which are outside of this scope
1915      * };
1916      * --------------
1917      */
1918     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1919 
1920     /**
1921      * Register a handler which will be called whenever a character data
1922      * segment is encountered.
1923      *
1924      * Example:
1925      * --------------
1926      * // Call this function whenever a CData section is encountered
1927      * onCData = (string s)
1928      * {
1929      *     // Your code here
1930      *
1931      *     // The passed parameter s does not include the opening <![CDATA[
1932      *     // nor closing ]]>
1933      *     //
1934      *     // This is a a closure, so code here may reference
1935      *     // variables which are outside of this scope
1936      * };
1937      * --------------
1938      */
1939     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1940 
1941     /**
1942      * Register a handler which will be called whenever a comment is
1943      * encountered.
1944      *
1945      * Example:
1946      * --------------
1947      * // Call this function whenever a comment is encountered
1948      * onComment = (string s)
1949      * {
1950      *     // Your code here
1951      *
1952      *     // The passed parameter s does not include the opening <!-- nor
1953      *     // closing -->
1954      *     //
1955      *     // This is a a closure, so code here may reference
1956      *     // variables which are outside of this scope
1957      * };
1958      * --------------
1959      */
1960     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1961 
1962     /**
1963      * Register a handler which will be called whenever a processing
1964      * instruction is encountered.
1965      *
1966      * Example:
1967      * --------------
1968      * // Call this function whenever a processing instruction is encountered
1969      * onPI = (string s)
1970      * {
1971      *     // Your code here
1972      *
1973      *     // The passed parameter s does not include the opening <? nor
1974      *     // closing ?>
1975      *     //
1976      *     // This is a a closure, so code here may reference
1977      *     // variables which are outside of this scope
1978      * };
1979      * --------------
1980      */
1981     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1982 
1983     /**
1984      * Register a handler which will be called whenever an XML instruction is
1985      * encountered.
1986      *
1987      * Example:
1988      * --------------
1989      * // Call this function whenever an XML instruction is encountered
1990      * // (Note: XML instructions may only occur preceding the root tag of a
1991      * // document).
1992      * onPI = (string s)
1993      * {
1994      *     // Your code here
1995      *
1996      *     // The passed parameter s does not include the opening <! nor
1997      *     // closing >
1998      *     //
1999      *     // This is a a closure, so code here may reference
2000      *     // variables which are outside of this scope
2001      * };
2002      * --------------
2003      */
2004     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2005 
2006     /**
2007      * Parse an XML element.
2008      *
2009      * Parsing will continue until the end of the current element. Any items
2010      * encountered for which a handler has been registered will invoke that
2011      * handler.
2012      *
2013      * Throws: various kinds of XMLException
2014      */
2015     void parse()
2016     {
2017         import std.algorithm.searching : startsWith;
2018         import std..string : indexOf;
2019 
2020         string t;
2021         const Tag root = tag_;
2022         Tag[string] startTags;
2023         if (tag_ !is null) startTags[tag_.name] = tag_;
2024 
2025         while (s.length != 0)
2026         {
2027             if (startsWith(*s,"<!--"))
2028             {
2029                 chop(*s,4);
2030                 t = chop(*s,indexOf(*s,"-->"));
2031                 if (commentHandler.funcptr !is null) commentHandler(t);
2032                 chop(*s,3);
2033             }
2034             else if (startsWith(*s,"<![CDATA["))
2035             {
2036                 chop(*s,9);
2037                 t = chop(*s,indexOf(*s,"]]>"));
2038                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2039                 chop(*s,3);
2040             }
2041             else if (startsWith(*s,"<!"))
2042             {
2043                 chop(*s,2);
2044                 t = chop(*s,indexOf(*s,">"));
2045                 if (xiHandler.funcptr !is null) xiHandler(t);
2046                 chop(*s,1);
2047             }
2048             else if (startsWith(*s,"<?"))
2049             {
2050                 chop(*s,2);
2051                 t = chop(*s,indexOf(*s,"?>"));
2052                 if (piHandler.funcptr !is null) piHandler(t);
2053                 chop(*s,2);
2054             }
2055             else if (startsWith(*s,"<"))
2056             {
2057                 tag_ = new Tag(*s,true);
2058                 if (root is null)
2059                     return; // Return to constructor of derived class
2060 
2061                 if (tag_.isStart)
2062                 {
2063                     startTags[tag_.name] = tag_;
2064 
2065                     auto parser = new ElementParser(this);
2066 
2067                     auto handler = tag_.name in onStartTag;
2068                     if (handler !is null) (*handler)(parser);
2069                     else
2070                     {
2071                         handler = null in onStartTag;
2072                         if (handler !is null) (*handler)(parser);
2073                     }
2074                 }
2075                 else if (tag_.isEnd)
2076                 {
2077                     const startTag = startTags[tag_.name];
2078                     string text;
2079 
2080                     if (startTag.tagString.length == 0)
2081                         assert(0);
2082 
2083                     immutable(char)* p = startTag.tagString.ptr
2084                         + startTag.tagString.length;
2085                     immutable(char)* q = &tag_.tagString[0];
2086                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2087 
2088                     auto element = new Element(startTag);
2089                     if (text.length != 0) element ~= new Text(text);
2090 
2091                     auto handler = tag_.name in onEndTag;
2092                     if (handler !is null) (*handler)(element);
2093                     else
2094                     {
2095                         handler = null in onEndTag;
2096                         if (handler !is null) (*handler)(element);
2097                     }
2098 
2099                     if (tag_.name == root.name) return;
2100                 }
2101                 else if (tag_.isEmpty)
2102                 {
2103                     Tag startTag = new Tag(tag_.name);
2104 
2105                     // FIX by hed010gy, for bug 2979
2106                     // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2107                     if (tag_.attr.length > 0)
2108                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2109                     // END FIX
2110 
2111                     // Handle the pretend start tag
2112                     string s2;
2113                     auto parser = new ElementParser(startTag,&s2);
2114                     auto handler1 = startTag.name in onStartTag;
2115                     if (handler1 !is null) (*handler1)(parser);
2116                     else
2117                     {
2118                         handler1 = null in onStartTag;
2119                         if (handler1 !is null) (*handler1)(parser);
2120                     }
2121 
2122                     // Handle the pretend end tag
2123                     auto element = new Element(startTag);
2124                     auto handler2 = tag_.name in onEndTag;
2125                     if (handler2 !is null) (*handler2)(element);
2126                     else
2127                     {
2128                         handler2 = null in onEndTag;
2129                         if (handler2 !is null) (*handler2)(element);
2130                     }
2131                 }
2132             }
2133             else
2134             {
2135                 t = chop(*s,indexOf(*s,"<"));
2136                 if (rawTextHandler.funcptr !is null)
2137                     rawTextHandler(t);
2138                 else if (textHandler.funcptr !is null)
2139                     textHandler(decode(t,DecodeMode.LOOSE));
2140             }
2141         }
2142     }
2143 
2144     /**
2145      * Returns that part of the element which has already been parsed
2146      */
2147     override string toString() const @nogc @safe pure nothrow
2148     {
2149         assert(elementStart.length >= s.length);
2150         return elementStart[0 .. elementStart.length - s.length];
2151     }
2152 
2153 }
2154 
2155 private
2156 {
2157     template Check(string msg)
2158     {
2159         string old = s;
2160 
2161         void fail() @safe pure
2162         {
2163             s = old;
2164             throw new Err(s,msg);
2165         }
2166 
2167         void fail(Err e) @safe pure
2168         {
2169             s = old;
2170             throw new Err(s,msg,e);
2171         }
2172 
2173         void fail(string msg2) @safe pure
2174         {
2175             fail(new Err(s,msg2));
2176         }
2177     }
2178 
2179     void checkMisc(ref string s) @safe pure // rule 27
2180     {
2181         import std.algorithm.searching : startsWith;
2182 
2183         mixin Check!("Misc");
2184 
2185         try
2186         {
2187                  if (s.startsWith("<!--")) { checkComment(s); }
2188             else if (s.startsWith("<?"))   { checkPI(s); }
2189             else                           { checkSpace(s); }
2190         }
2191         catch (Err e) { fail(e); }
2192     }
2193 
2194     void checkDocument(ref string s) @safe pure // rule 1
2195     {
2196         mixin Check!("Document");
2197         try
2198         {
2199             checkProlog(s);
2200             checkElement(s);
2201             star!(checkMisc)(s);
2202         }
2203         catch (Err e) { fail(e); }
2204     }
2205 
2206     void checkChars(ref string s) @safe pure // rule 2
2207     {
2208         // TO DO - Fix std.utf stride and decode functions, then use those
2209         // instead
2210         import std.format : format;
2211 
2212         mixin Check!("Chars");
2213 
2214         dchar c;
2215         int n = -1;
2216         foreach (i, dchar d; s)
2217         {
2218             if (!isChar(d))
2219             {
2220                 c = d;
2221                 n = cast(int) i;
2222                 break;
2223             }
2224         }
2225         if (n != -1)
2226         {
2227             s = s[n..$];
2228             fail(format("invalid character: U+%04X",c));
2229         }
2230     }
2231 
2232     void checkSpace(ref string s) @safe pure // rule 3
2233     {
2234         import std.algorithm.searching : countUntil;
2235         import std.ascii : isWhite;
2236         import std.utf : byCodeUnit;
2237 
2238         mixin Check!("Whitespace");
2239         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2240         if (i == -1 && s.length > 0 && isWhite(s[0]))
2241             s = s[$ .. $];
2242         else if (i > -1)
2243             s = s[i .. $];
2244         if (s is old) fail();
2245     }
2246 
2247     void checkName(ref string s, out string name) @safe pure // rule 5
2248     {
2249         mixin Check!("Name");
2250 
2251         if (s.length == 0) fail();
2252         int n;
2253         foreach (i, dchar c;s)
2254         {
2255             if (c == '_' || c == ':' || isLetter(c)) continue;
2256             if (i == 0) fail();
2257             if (c == '-' || c == '.' || isDigit(c)
2258                 || isCombiningChar(c) || isExtender(c)) continue;
2259             n = cast(int) i;
2260             break;
2261         }
2262         name = s[0 .. n];
2263         s = s[n..$];
2264     }
2265 
2266     void checkAttValue(ref string s) @safe pure // rule 10
2267     {
2268         import std.algorithm.searching : countUntil;
2269         import std.utf : byCodeUnit;
2270 
2271         mixin Check!("AttValue");
2272 
2273         if (s.length == 0) fail();
2274         char c = s[0];
2275         if (c != '\u0022' && c != '\u0027')
2276             fail("attribute value requires quotes");
2277         s = s[1..$];
2278         for (;;)
2279         {
2280             s = s[s.byCodeUnit.countUntil(c) .. $];
2281             if (s.length == 0) fail("unterminated attribute value");
2282             if (s[0] == '<') fail("< found in attribute value");
2283             if (s[0] == c) break;
2284             try { checkReference(s); } catch (Err e) { fail(e); }
2285         }
2286         s = s[1..$];
2287     }
2288 
2289     void checkCharData(ref string s) @safe pure // rule 14
2290     {
2291         import std.algorithm.searching : startsWith;
2292 
2293         mixin Check!("CharData");
2294 
2295         while (s.length != 0)
2296         {
2297             if (s.startsWith("&")) break;
2298             if (s.startsWith("<")) break;
2299             if (s.startsWith("]]>")) fail("]]> found within char data");
2300             s = s[1..$];
2301         }
2302     }
2303 
2304     void checkComment(ref string s) @safe pure // rule 15
2305     {
2306         import std..string : indexOf;
2307 
2308         mixin Check!("Comment");
2309 
2310         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2311         ptrdiff_t n = s.indexOf("--");
2312         if (n == -1) fail("unterminated comment");
2313         s = s[n..$];
2314         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2315     }
2316 
2317     void checkPI(ref string s) @safe pure // rule 16
2318     {
2319         mixin Check!("PI");
2320 
2321         try
2322         {
2323             checkLiteral("<?",s);
2324             checkEnd("?>",s);
2325         }
2326         catch (Err e) { fail(e); }
2327     }
2328 
2329     void checkCDSect(ref string s) @safe pure // rule 18
2330     {
2331         mixin Check!("CDSect");
2332 
2333         try
2334         {
2335             checkLiteral(cdata,s);
2336             checkEnd("]]>",s);
2337         }
2338         catch (Err e) { fail(e); }
2339     }
2340 
2341     void checkProlog(ref string s) @safe pure // rule 22
2342     {
2343         mixin Check!("Prolog");
2344 
2345         try
2346         {
2347             /* The XML declaration is optional
2348              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2349              */
2350             opt!(checkXMLDecl)(s);
2351 
2352             star!(checkMisc)(s);
2353             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2354         }
2355         catch (Err e) { fail(e); }
2356     }
2357 
2358     void checkXMLDecl(ref string s) @safe pure // rule 23
2359     {
2360         mixin Check!("XMLDecl");
2361 
2362         try
2363         {
2364             checkLiteral("<?xml",s);
2365             checkVersionInfo(s);
2366             opt!(checkEncodingDecl)(s);
2367             opt!(checkSDDecl)(s);
2368             opt!(checkSpace)(s);
2369             checkLiteral("?>",s);
2370         }
2371         catch (Err e) { fail(e); }
2372     }
2373 
2374     void checkVersionInfo(ref string s) @safe pure // rule 24
2375     {
2376         mixin Check!("VersionInfo");
2377 
2378         try
2379         {
2380             checkSpace(s);
2381             checkLiteral("version",s);
2382             checkEq(s);
2383             quoted!(checkVersionNum)(s);
2384         }
2385         catch (Err e) { fail(e); }
2386     }
2387 
2388     void checkEq(ref string s) @safe pure // rule 25
2389     {
2390         mixin Check!("Eq");
2391 
2392         try
2393         {
2394             opt!(checkSpace)(s);
2395             checkLiteral("=",s);
2396             opt!(checkSpace)(s);
2397         }
2398         catch (Err e) { fail(e); }
2399     }
2400 
2401     void checkVersionNum(ref string s) @safe pure // rule 26
2402     {
2403         import std.algorithm.searching : countUntil;
2404         import std.utf : byCodeUnit;
2405 
2406         mixin Check!("VersionNum");
2407 
2408         s = s[s.byCodeUnit.countUntil('\"') .. $];
2409         if (s is old) fail();
2410     }
2411 
2412     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2413     {
2414         mixin Check!("DocTypeDecl");
2415 
2416         try
2417         {
2418             checkLiteral("<!DOCTYPE",s);
2419             //
2420             // TO DO -- ensure DOCTYPE is well formed
2421             // (But not yet. That's one of our "future directions")
2422             //
2423             checkEnd(">",s);
2424         }
2425         catch (Err e) { fail(e); }
2426     }
2427 
2428     void checkSDDecl(ref string s) @safe pure // rule 32
2429     {
2430         import std.algorithm.searching : startsWith;
2431 
2432         mixin Check!("SDDecl");
2433 
2434         try
2435         {
2436             checkSpace(s);
2437             checkLiteral("standalone",s);
2438             checkEq(s);
2439         }
2440         catch (Err e) { fail(e); }
2441 
2442         int n = 0;
2443              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2444         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2445         else fail("standalone attribute value must be 'yes', \"yes\","~
2446             " 'no' or \"no\"");
2447         s = s[n..$];
2448     }
2449 
2450     void checkElement(ref string s) @safe pure // rule 39
2451     {
2452         mixin Check!("Element");
2453 
2454         string sname,ename,t;
2455         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2456 
2457         if (t == "STag")
2458         {
2459             try
2460             {
2461                 checkContent(s);
2462                 t = s;
2463                 checkETag(s,ename);
2464             }
2465             catch (Err e) { fail(e); }
2466 
2467             if (sname != ename)
2468             {
2469                 s = t;
2470                 fail("end tag name \"" ~ ename
2471                     ~ "\" differs from start tag name \""~sname~"\"");
2472             }
2473         }
2474     }
2475 
2476     // rules 40 and 44
2477     void checkTag(ref string s, out string type, out string name) @safe pure
2478     {
2479         mixin Check!("Tag");
2480 
2481         try
2482         {
2483             type = "STag";
2484             checkLiteral("<",s);
2485             checkName(s,name);
2486             star!(seq!(checkSpace,checkAttribute))(s);
2487             opt!(checkSpace)(s);
2488             if (s.length != 0 && s[0] == '/')
2489             {
2490                 s = s[1..$];
2491                 type = "ETag";
2492             }
2493             checkLiteral(">",s);
2494         }
2495         catch (Err e) { fail(e); }
2496     }
2497 
2498     void checkAttribute(ref string s) @safe pure // rule 41
2499     {
2500         mixin Check!("Attribute");
2501 
2502         try
2503         {
2504             string name;
2505             checkName(s,name);
2506             checkEq(s);
2507             checkAttValue(s);
2508         }
2509         catch (Err e) { fail(e); }
2510     }
2511 
2512     void checkETag(ref string s, out string name) @safe pure // rule 42
2513     {
2514         mixin Check!("ETag");
2515 
2516         try
2517         {
2518             checkLiteral("</",s);
2519             checkName(s,name);
2520             opt!(checkSpace)(s);
2521             checkLiteral(">",s);
2522         }
2523         catch (Err e) { fail(e); }
2524     }
2525 
2526     void checkContent(ref string s) @safe pure // rule 43
2527     {
2528         import std.algorithm.searching : startsWith;
2529 
2530         mixin Check!("Content");
2531 
2532         try
2533         {
2534             while (s.length != 0)
2535             {
2536                 old = s;
2537                      if (s.startsWith("&"))        { checkReference(s); }
2538                 else if (s.startsWith("<!--"))     { checkComment(s); }
2539                 else if (s.startsWith("<?"))       { checkPI(s); }
2540                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2541                 else if (s.startsWith("</"))       { break; }
2542                 else if (s.startsWith("<"))        { checkElement(s); }
2543                 else                               { checkCharData(s); }
2544             }
2545         }
2546         catch (Err e) { fail(e); }
2547     }
2548 
2549     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2550     {
2551         import std.format : format;
2552 
2553         mixin Check!("CharRef");
2554 
2555         c = 0;
2556         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2557         int radix = 10;
2558         if (s.length != 0 && s[0] == 'x')
2559         {
2560             s = s[1..$];
2561             radix = 16;
2562         }
2563         if (s.length == 0) fail("unterminated character reference");
2564         if (s[0] == ';')
2565             fail("character reference must have at least one digit");
2566         while (s.length != 0)
2567         {
2568             immutable char d = s[0];
2569             int n = 0;
2570             switch (d)
2571             {
2572                 case 'F','f': ++n;      goto case;
2573                 case 'E','e': ++n;      goto case;
2574                 case 'D','d': ++n;      goto case;
2575                 case 'C','c': ++n;      goto case;
2576                 case 'B','b': ++n;      goto case;
2577                 case 'A','a': ++n;      goto case;
2578                 case '9':     ++n;      goto case;
2579                 case '8':     ++n;      goto case;
2580                 case '7':     ++n;      goto case;
2581                 case '6':     ++n;      goto case;
2582                 case '5':     ++n;      goto case;
2583                 case '4':     ++n;      goto case;
2584                 case '3':     ++n;      goto case;
2585                 case '2':     ++n;      goto case;
2586                 case '1':     ++n;      goto case;
2587                 case '0':     break;
2588                 default: n = 100; break;
2589             }
2590             if (n >= radix) break;
2591             c *= radix;
2592             c += n;
2593             s = s[1..$];
2594         }
2595         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2596         if (s.length == 0 || s[0] != ';') fail("expected ;");
2597         else s = s[1..$];
2598     }
2599 
2600     void checkReference(ref string s) @safe pure // rule 67
2601     {
2602         import std.algorithm.searching : startsWith;
2603 
2604         mixin Check!("Reference");
2605 
2606         try
2607         {
2608             dchar c;
2609             if (s.startsWith("&#")) checkCharRef(s,c);
2610             else checkEntityRef(s);
2611         }
2612         catch (Err e) { fail(e); }
2613     }
2614 
2615     void checkEntityRef(ref string s) @safe pure // rule 68
2616     {
2617         mixin Check!("EntityRef");
2618 
2619         try
2620         {
2621             string name;
2622             checkLiteral("&",s);
2623             checkName(s,name);
2624             checkLiteral(";",s);
2625         }
2626         catch (Err e) { fail(e); }
2627     }
2628 
2629     void checkEncName(ref string s) @safe pure // rule 81
2630     {
2631         import std.algorithm.searching : countUntil;
2632         import std.ascii : isAlpha;
2633         import std.utf : byCodeUnit;
2634 
2635         mixin Check!("EncName");
2636 
2637         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2638         if (s is old) fail();
2639         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2640     }
2641 
2642     void checkEncodingDecl(ref string s) @safe pure // rule 80
2643     {
2644         mixin Check!("EncodingDecl");
2645 
2646         try
2647         {
2648             checkSpace(s);
2649             checkLiteral("encoding",s);
2650             checkEq(s);
2651             quoted!(checkEncName)(s);
2652         }
2653         catch (Err e) { fail(e); }
2654     }
2655 
2656     // Helper functions
2657 
2658     void checkLiteral(string literal,ref string s) @safe pure
2659     {
2660         import std..string : startsWith;
2661 
2662         mixin Check!("Literal");
2663 
2664         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2665         s = s[literal.length..$];
2666     }
2667 
2668     void checkEnd(string end,ref string s) @safe pure
2669     {
2670         import std..string : indexOf;
2671         // Deliberately no mixin Check here.
2672 
2673         auto n = s.indexOf(end);
2674         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2675         s = s[n..$];
2676         checkLiteral(end,s);
2677     }
2678 
2679     // Metafunctions -- none of these use mixin Check
2680 
2681     void opt(alias f)(ref string s)
2682     {
2683         try { f(s); } catch (Err e) {}
2684     }
2685 
2686     void plus(alias f)(ref string s)
2687     {
2688         f(s);
2689         star!(f)(s);
2690     }
2691 
2692     void star(alias f)(ref string s)
2693     {
2694         while (s.length != 0)
2695         {
2696             try { f(s); }
2697             catch (Err e) { return; }
2698         }
2699     }
2700 
2701     void quoted(alias f)(ref string s)
2702     {
2703         import std..string : startsWith;
2704 
2705         if (s.startsWith("'"))
2706         {
2707             checkLiteral("'",s);
2708             f(s);
2709             checkLiteral("'",s);
2710         }
2711         else
2712         {
2713             checkLiteral("\"",s);
2714             f(s);
2715             checkLiteral("\"",s);
2716         }
2717     }
2718 
2719     void seq(alias f,alias g)(ref string s)
2720     {
2721         f(s);
2722         g(s);
2723     }
2724 }
2725 
2726 /**
2727  * Check an entire XML document for well-formedness
2728  *
2729  * Params:
2730  *      s = the document to be checked, passed as a string
2731  *
2732  * Throws: CheckException if the document is not well formed
2733  *
2734  * CheckException's toString() method will yield the complete hierarchy of
2735  * parse failure (the XML equivalent of a stack trace), giving the line and
2736  * column number of every failure at every level.
2737  */
2738 void check(string s) @safe pure
2739 {
2740     try
2741     {
2742         checkChars(s);
2743         checkDocument(s);
2744         if (s.length != 0) throw new Err(s,"Junk found after document");
2745     }
2746     catch (Err e)
2747     {
2748         e.complete(s);
2749         throw e;
2750     }
2751 }
2752 
2753 @system pure unittest
2754 {
2755     import std..string : indexOf;
2756 
2757     try
2758     {
2759         check(q"[<?xml version="1.0"?>
2760         <catalog>
2761            <book id="bk101">
2762               <author>Gambardella, Matthew</author>
2763               <title>XML Developer's Guide</title>
2764               <genre>Computer</genre>
2765               <price>44.95</price>
2766               <publish_date>2000-10-01</publish_date>
2767               <description>An in-depth look at creating applications
2768               with XML.</description>
2769            </book>
2770            <book id="bk102">
2771               <author>Ralls, Kim</author>
2772               <title>Midnight Rain</title>
2773               <genre>Fantasy</genres>
2774               <price>5.95</price>
2775               <publish_date>2000-12-16</publish_date>
2776               <description>A former architect battles corporate zombies,
2777               an evil sorceress, and her own childhood to become queen
2778               of the world.</description>
2779            </book>
2780            <book id="bk103">
2781               <author>Corets, Eva</author>
2782               <title>Maeve Ascendant</title>
2783               <genre>Fantasy</genre>
2784               <price>5.95</price>
2785               <publish_date>2000-11-17</publish_date>
2786               <description>After the collapse of a nanotechnology
2787               society in England, the young survivors lay the
2788               foundation for a new society.</description>
2789            </book>
2790         </catalog>
2791         ]");
2792         assert(false);
2793     }
2794     catch (CheckException e)
2795     {
2796         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2797                                       " from start tag name \"genre\"");
2798         assert(n != -1);
2799     }
2800 }
2801 
2802 @system unittest
2803 {
2804     string s = q"EOS
2805 <?xml version="1.0"?>
2806 <set>
2807     <one>A</one>
2808     <!-- comment -->
2809     <two>B</two>
2810 </set>
2811 EOS";
2812     try
2813     {
2814         check(s);
2815     }
2816     catch (CheckException e)
2817     {
2818         assert(0, e.toString());
2819     }
2820 }
2821 
2822 @system unittest
2823 {
2824     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2825                         xmlns:stream="http://etherx.'jabber'.org/streams"
2826                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2827                         xml:lang="en" version="1.0" attr='a"b"c'>
2828                         </stream:stream></r>`;
2829 
2830     DocumentParser parser = new DocumentParser(test_xml);
2831     bool tested = false;
2832     parser.onStartTag["stream:stream"] = (ElementParser p) {
2833         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2834         assert(p.tag.attr["from"] == "jid.pl");
2835         assert(p.tag.attr["attr"] == "a\"b\"c");
2836         tested = true;
2837     };
2838     parser.parse();
2839     assert(tested);
2840 }
2841 
2842 @system unittest
2843 {
2844     string s = q"EOS
2845 <?xml version="1.0" encoding="utf-8"?> <Tests>
2846     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2847 </Tests>
2848 EOS";
2849     auto xml = new DocumentParser(s);
2850 
2851     xml.onStartTag["Test"] = (ElementParser xml) {
2852         assert(xml.tag.attr["thing"] == "What & Up");
2853     };
2854 
2855     xml.onEndTag["Test"] = (in Element e) {
2856         assert(e.text() == "What & Up Second");
2857     };
2858     xml.parse();
2859 }
2860 
2861 @system unittest
2862 {
2863     string s = `<tag attr="&quot;value&gt;" />`;
2864     auto doc = new Document(s);
2865     assert(doc.toString() == s);
2866 }
2867 
2868 /** The base class for exceptions thrown by this module */
2869 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2870 
2871 // Other exceptions
2872 
2873 /// Thrown during Comment constructor
2874 class CommentException : XMLException
2875 { private this(string msg) @safe pure { super(msg); } }
2876 
2877 /// Thrown during CData constructor
2878 class CDataException : XMLException
2879 { private this(string msg) @safe pure { super(msg); } }
2880 
2881 /// Thrown during XMLInstruction constructor
2882 class XIException : XMLException
2883 { private this(string msg) @safe pure { super(msg); } }
2884 
2885 /// Thrown during ProcessingInstruction constructor
2886 class PIException : XMLException
2887 { private this(string msg) @safe pure { super(msg); } }
2888 
2889 /// Thrown during Text constructor
2890 class TextException : XMLException
2891 { private this(string msg) @safe pure { super(msg); } }
2892 
2893 /// Thrown during decode()
2894 class DecodeException : XMLException
2895 { private this(string msg) @safe pure { super(msg); } }
2896 
2897 /// Thrown if comparing with wrong type
2898 class InvalidTypeException : XMLException
2899 { private this(string msg) @safe pure { super(msg); } }
2900 
2901 /// Thrown when parsing for Tags
2902 class TagException : XMLException
2903 { private this(string msg) @safe pure { super(msg); } }
2904 
2905 /**
2906  * Thrown during check()
2907  */
2908 class CheckException : XMLException
2909 {
2910     CheckException err; /// Parent in hierarchy
2911     private string tail;
2912     /**
2913      * Name of production rule which failed to parse,
2914      * or specific error message
2915      */
2916     string msg;
2917     size_t line = 0; /// Line number at which parse failure occurred
2918     size_t column = 0; /// Column number at which parse failure occurred
2919 
2920     private this(string tail,string msg,Err err=null) @safe pure
2921     {
2922         super(null);
2923         this.tail = tail;
2924         this.msg = msg;
2925         this.err = err;
2926     }
2927 
2928     private void complete(string entire) @safe pure
2929     {
2930         import std..string : count, lastIndexOf;
2931         import std.utf : toUTF32;
2932 
2933         string head = entire[0..$-tail.length];
2934         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2935         line = head.count("\n") + 1;
2936         dstring t = toUTF32(head[n..$]);
2937         column = t.length + 1;
2938         if (err !is null) err.complete(entire);
2939     }
2940 
2941     override string toString() const @safe pure
2942     {
2943         import std.format : format;
2944 
2945         string s;
2946         if (line != 0) s = format("Line %d, column %d: ",line,column);
2947         s ~= msg;
2948         s ~= '\n';
2949         if (err !is null) s = err.toString() ~ s;
2950         return s;
2951     }
2952 }
2953 
2954 private alias Err = CheckException;
2955 
2956 // Private helper functions
2957 
2958 private
2959 {
2960     inout(T) toType(T)(inout Object o)
2961     {
2962         T t = cast(T)(o);
2963         if (t is null)
2964         {
2965             throw new InvalidTypeException("Attempt to compare a "
2966                 ~ T.stringof ~ " with an instance of another type");
2967         }
2968         return t;
2969     }
2970 
2971     string chop(ref string s, size_t n) @safe pure nothrow
2972     {
2973         if (n == -1) n = s.length;
2974         string t = s[0 .. n];
2975         s = s[n..$];
2976         return t;
2977     }
2978 
2979     bool optc(ref string s, char c) @safe pure nothrow
2980     {
2981         immutable bool b = s.length != 0 && s[0] == c;
2982         if (b) s = s[1..$];
2983         return b;
2984     }
2985 
2986     void reqc(ref string s, char c) @safe pure
2987     {
2988         if (s.length == 0 || s[0] != c) throw new TagException("");
2989         s = s[1..$];
2990     }
2991 
2992     char requireOneOf(ref string s, string chars) @safe pure
2993     {
2994         import std..string : indexOf;
2995 
2996         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2997             throw new TagException("");
2998         immutable char ch = s[0];
2999         s = s[1..$];
3000         return ch;
3001     }
3002 
3003     size_t hash(string s,size_t h=0) @trusted nothrow
3004     {
3005         return typeid(s).getHash(&s) + h;
3006     }
3007 
3008     // Definitions from the XML specification
3009     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3010         0x10000,0x10FFFF];
3011     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3012         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3013         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3014         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3015         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3016         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3017         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3018         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3019         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3020         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3021         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3022         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3023         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3024         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3025         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3026         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3027         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3028         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3029         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3030         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3031         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3032         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3033         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3034         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3035         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3036         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3037         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3038         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3039         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3040         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3041         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3042         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3043         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3044         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3045         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3046         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3047         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3048         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3049         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3050         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3051         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3052     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3053     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3054         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3055         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3056         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3057         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3058         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3059         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3060         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3061         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3062         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3063         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3064         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3065         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3066         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3067         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3068         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3069         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3070         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3071         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3072         0x3099,0x3099,0x309A,0x309A];
3073     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3074         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3075         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3076         0x0ED9,0x0F20,0x0F29];
3077     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3078         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3079         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3080 
3081     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3082     {
3083         while (table.length != 0)
3084         {
3085             auto m = (table.length >> 1) & ~1;
3086             if (c < table[m])
3087             {
3088                 table = table[0 .. m];
3089             }
3090             else if (c > table[m+1])
3091             {
3092                 table = table[m+2..$];
3093             }
3094             else return true;
3095         }
3096         return false;
3097     }
3098 
3099     string startOf(string s) @safe nothrow pure
3100     {
3101         string r;
3102         foreach (char c;s)
3103         {
3104             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3105             if (r.length >= 40) { r ~= "___"; break; }
3106         }
3107         return r;
3108     }
3109 
3110     void exit(string s=null)
3111     {
3112         throw new XMLException(s);
3113     }
3114 }
3115 
3116 final class Attribute : Element
3117 {
3118     private alias string tstring;
3119     private tstring name_;
3120     private tstring value_;
3121 
3122     this (tstring name, tstring value, Element parent)
3123     {
3124         super(name);
3125         name_ = name;
3126         value_ = value;
3127         this.parent = parent;
3128     }
3129 
3130     override tstring name ()
3131     {
3132         return name_;
3133     }
3134 
3135     override tstring value ()
3136     {
3137         return value_;
3138     }
3139 }
3140 
3141 Element[] children(Element self)
3142 {
3143     import std.algorithm : map;
3144     import std.array : array;
3145 
3146     return self.elements.map!(e => cast(Element) e).array;
3147 }
3148 
3149 Attribute[] attributes (Element self)
3150 {
3151     auto attrs = new Attribute[self.tag.attr.length];
3152     attrs = attrs[0 .. 0];
3153 
3154     foreach (k, v ; self.tag.attr)
3155         attrs ~= new Attribute(k, v, self);
3156 
3157     return attrs;
3158 }
3159 
3160 Element query (Element self)
3161 {
3162     return self;
3163 }
3164 
3165 Element attribute (Element self, string prefix, string name, string value = null)
3166 {
3167     self.tag.attr[name] = value;
3168 
3169     return self;
3170 }