more.sdl source code

1 /**
2    $(P An SDL ($(LINK2 http://www.ikayzo.org/display/SDL/Home,Simple Declarative Language)) parser.
3      Supports StAX/SAX style API. See $(D more.std.dom) for DOM style API.)
4 
5    Examples:
6    --------------------------------
7    void printTags(char[] sdl) {
8        Tag tag;
9        while(parseSdlTag(&tag, &sdl)) {
10            writefln("(depth %s) tag '%s' values '%s' attributes '%s'",
11                tag.depth, tag.name, tag.values.data, tag.attributes.data);
12        }
13    }
14 
15    struct Person {
16        string name;
17        ushort age;
18        string[] nicknames;
19        auto children = appender!(Person[])();
20        void reset() {
21            name = null;
22            age = 0;
23            nicknames = null;
24            children.clear();
25        }
26        void parseFromSdl(ref SdlWalker walker) {
27            tag.enforceNoValues();
28            tag.enforceNoAttributes();
29            reset();
30            foreach(auto personWalker = walker.children();
31                    !personWalker.empty; personWalker.popFront) {
32 
33                if(tag.name == "name") {
34 
35                    tag.enforceNoAttributes();
36                    tag.enforceNoChildren();
37                    tag.getOneValue(name);
38 
39                } else if(tag.name == "age") {
40 
41                    tag.enforceNoAttributes();
42                    tag.enforceNoChildren();
43                    tag.getOneValue(age);
44 
45                } else if(tag.name == "nicknames") {
46 
47                    tag.enforceNoAttributes();
48                    tag.enforceNoChildren();
49                    tag.getValues(nicknames);
50 
51                } else if(tag.name == "child") {
52 
53 
54 
55                    // todo implement
56 
57                } else tag.throwIsUnknown();
58            }
59        }
60        void validate() {
61            if(name == null) throw new Exception("person is missing the 'name' tag");
62            if(age == 0) throw new Exception("person is missing the 'age' tag");
63        }
64    }
65 
66    void parseTags(char[] sdl) {
67        struct Person {
68            string name;
69            ushort age;
70            string[] nicknames;
71            Person[] children;
72            void reset() {
73                name = null;
74                age = 0;
75                nicknames = null;
76                children = null;
77            }
78            void validate() {
79                if(name == null) throw new Exception("person is missing the 'name' tag");
80                if(age == 0) throw new Exception("person is missing the 'age' tag");
81            }
82        }
83        auto people = appender(Person[])();
84        Person person;
85 
86        Tag tag;
87        for(auto walker = SdlWalker(&tag, sdl); !walker.empty; walker.popFront) {
88            if(tag.name == "person") {
89 
90                tag.enforceNoValues();
91                tag.enforceNoAttributes();
92                person.reset();
93                person.validate();
94                people.put(person);
95 
96            } else tag.throwIsUnknown();
97        }
98    }
99 
100    --------------------------------
101    TODO: implement escaped strings
102    TODO: finish unit tests
103    TODO: write a input-range sdl parser
104    TODO: implement datetime/timespans
105 
106    Authors: Jonathan Marler, johnnymarler@gmail.com
107    License: use freely for any purpose
108  */
109 
110 module more.sdl;
111 
112 import core.stdc..string : memmove;
113 
114 import std.array;
115 import std..string;
116 import std.range;
117 import std.conv;
118 import std.bitmanip;
119 import std.traits;
120 
121 import more.common;
122 import more.utf8;
123 
124 version(unittest)
125 {
126   import std.stdio;
127 }
128 
129 alias LineNumber = size_t;
130 
131 /// Used in SdlParseException to distinguish specific sdl parse errors.
132 enum SdlErrorType {
133   unknown,
134   braceAfterNewline,
135   mixedValuesAndAttributes,
136 }
137 /// Thrown by sdl parse functions when invalid SDL is encountered.
138 class SdlParseException : Exception
139 {
140   SdlErrorType type;
141   LineNumber lineInSdl;
142   this(LineNumber lineInSdl, string msg, string file = __FILE__, size_t codeLine = __LINE__) {
143     this(SdlErrorType.unknown, lineInSdl, msg, file, codeLine);
144   }
145   this(SdlErrorType errorType, LineNumber lineInSdl, string msg, string file = __FILE__, size_t codeLine = __LINE__) {
146     super((lineInSdl == 0) ? msg : "line "~to!string(lineInSdl)~": "~msg, file, codeLine);
147     this.type = errorType;
148     this.lineInSdl = lineInSdl;
149   }
150 }
151 
152 /// Holds three character arrays for an SDL attribute, namespace/id/value.
153 struct Attribute {
154   //size_t line, column;
155   const(char)[] namespace;
156   const(char)[] id;
157   const(char)[] value;
158 }
159 
160 /// Contains a tag's name, values and attributes.
161 /// It does not contain any information about its child tags because that part of the sdl would not have been parsed yet, however,
162 /// it does indicate if the tag was followed by an open brace.
163 /// This struct is used directly for the StAX/SAX APIs and indirectly for the DOM or Reflection APIs.
164 struct Tag {
165 
166   // A bifield of flags used to pass extra options to parseSdlTag.
167   // Used to accept/reject different types of SDL or cause parseSdlTag to
168   // behave differently like preventing it from modifying the sdl text.
169   private ubyte flags;
170 
171   /// Normally SDL only allows a tag's attributes to appear after all it's values.
172   /// This flag causes parseSdlTag to allow values/attributes to appear in any order, i.e.
173   ///     $(D tag attr="my-value" "another-value" # would be valid)
174   @property @safe bool allowMixedValuesAndAttributes() pure nothrow const { return (flags & 1U) != 0;}
175   @property @safe void allowMixedValuesAndAttributes(bool v) pure nothrow { if (v) flags |= 1U;else flags &= ~1U;}
176 
177   /// Causes parseSdlTag to allow a tag's open brace to appear after any number of newlines
178   @property @safe bool allowBraceAfterNewline() pure nothrow const        { return (flags & 2U) != 0;}
179   @property @safe void allowBraceAfterNewline(bool v) pure nothrow        { if (v) flags |= 2U;else flags &= ~2U;}
180 
181   /// Causes parseSdlTag to allow a child tags to appear on the same line as the parents
182   /// open and close braces
183   @property @safe bool allowChildTagsOnSameLineAsBrace() pure nothrow const { return (flags & 4U) != 0;}
184   @property @safe void allowChildTagsOnSameLineAsBrace(bool v) pure nothrow { if (v) flags |= 4U;else flags &= ~4U;}
185 
186   // Causes parseSdlTag to throw an exception if it finds any number literals
187   // with postfix letters indicating the type
188   // @property @safe bool verifyTypedNumbers() pure nothrow const            { return (flags & 4U) != 0;}
189   // @property @safe void verifyTypedNumbers(bool v) pure nothrow            { if (v) flags |= 4U;else flags &= ~4U;}
190 
191   /// Causes parseSdlTag to set the tag name to null instead of "content" for anonymous tags.
192   /// This allows the application to differentiate betweeen "content" tags and anonymous tags.
193   @property @safe bool anonymousTagNameIsNull() pure nothrow const        { return (flags & 8U) != 0;}
194   @property @safe void anonymousTagNameIsNull(bool v) pure nothrow        { if (v) flags |= 8U;else flags &= ~8U;}
195 
196   /// Causes parseSdlTag to accept non-quoted strings
197   @property @safe bool acceptUnquotedStrings() pure nothrow const        { return (flags & 16U) != 0;}
198   @property @safe void acceptUnquotedStrings(bool v) pure nothrow        { if (v) flags |= 16U;else flags &= ~16U;}
199 
200   /// Prevents parseSdlTag from modifying the given sdl text for things such as
201   /// processing escaped strings
202   @property @safe bool preserveSdlText() pure nothrow const               { return (flags & 128U) != 0;}
203   @property @safe void preserveSdlText(bool v) pure nothrow               { if (v) flags |= 128U;else flags &= ~128U;}
204 
205 
206   // TODO: maybe add an option to specify that any values accessed should be copied to new buffers
207   // NOTE: Do not add an option to prevent parseSdlTag from throwing exceptions when the input has ended.
208   //       It may have been useful for an input buffered object, however, the buffered input object will
209   //       need to know when it has a full tag anyway so the sdl will already contain the characters to end the tag.
210   //       Or in the case of braces on the next line, if the tag has alot of whitespace until the actual end-of-tag
211   //       delimiter, the buffered input reader can insert a semi-colon or open_brace to signify the end of the tag
212   //       earlier.
213 
214 
215   /// For now an alias for useStrictSdl. Use this function if you want your code to always use
216   /// the default mode whatever it may become.
217   alias useStrictSdl useDefaultSdl;
218 
219   /// This is the default mode.
220   /// $(OL
221   ///   $(LI Causes parseSdlTag to throw SdlParseException if a tag's open brace appears after a newline)
222   ///   $(LI Causes parseSdlTag to throw SdlParseException if any tag value appears after any tag attribute)
223   ///   $(LI Causes parseSdlTag to set anonymous tag names to "content")
224   /// )
225   void useStrictSdl() {
226     this.allowMixedValuesAndAttributes = false;
227     this.allowBraceAfterNewline = false;
228     this.allowChildTagsOnSameLineAsBrace = false;
229     this.anonymousTagNameIsNull = false;
230     this.acceptUnquotedStrings = false;
231   }
232   /// $(OL
233   ///   $(LI Causes parseSdlTag to throw SdlParseException if a tag's open brace appears after a newline)
234   ///   $(LI Causes parseSdlTag to throw SdlParseException if any tag value appears after any tag attribute)
235   ///   $(LI Causes parseSdlTag to set anonymous tag names to "content")
236   /// )
237   void useLooseSdl() {
238     this.allowMixedValuesAndAttributes = true;
239     this.allowBraceAfterNewline = true;
240     this.allowChildTagsOnSameLineAsBrace = true;
241     this.anonymousTagNameIsNull = false;
242     this.acceptUnquotedStrings = true;
243   }
244   /// $(OL
245   ///   $(LI Causes parseSdlTag to allow a tag's open brace appears after any number of newlines)
246   ///   $(LI Causes parseSdlTag to allow tag values an attributes to mixed in any order)
247   ///   $(LI Causes parseSdlTag to set anonymous tag names to null)
248   /// )
249   void useProposedSdl() {
250     this.allowMixedValuesAndAttributes = true;
251     this.allowBraceAfterNewline = true;
252     this.allowChildTagsOnSameLineAsBrace = true;
253     this.anonymousTagNameIsNull = true;
254     this.acceptUnquotedStrings = true;
255   }
256 
257   /// The depth of the tag, all root tags are at depth 0.
258   size_t depth = 0;
259 
260   /// The line number of the SDL parser after parsing this tag.
261   size_t line;
262   size_t column;
263 
264   /// The namespace of the tag
265   const(char)[] namespace;
266   /// The name of the tag
267   const(char)[] name;
268   /// The values of the tag
269   auto values     = appender!(const(char)[][])();
270   /// The attributes of the tag
271   auto attributes = appender!(Attribute[])();
272   /// Indicates the tag has an open brace
273   bool hasOpenBrace;
274 
275   version(unittest)
276   {
277     // This function is only so unit tests can create Tags to compare
278     // with tags parsed from the parseSdlTag function. This constructor
279     // should never be called in production code
280     this(const(char)[] name, const(char)[][] values...) {
281       auto colonIndex = name.indexOf(':');
282       if(colonIndex > -1) {
283         this.namespace = name[0..colonIndex];
284         this.name = name[colonIndex+1..$];
285       } else {
286         this.namespace.length = 0;
287         this.name = name;
288       }
289       foreach(value; values) {
290 
291         const(char)[] attributeNamespace = "";
292         size_t equalIndex = size_t.max;
293 
294         // check if it is an attribute
295         if(value.length && isIDStart(value[0])) {
296           size_t i = 1;
297           while(true) {
298             if(i >= value.length) break;
299             auto c = value[i];
300             if(!isID(value[i])) {
301               if(c == ':') {
302                 if(attributeNamespace.length) throw new Exception("contained 2 colons?");
303                 attributeNamespace = value[0..i];
304                 i++;
305                 continue;
306               }
307               if(value[i] == '=') {
308                 equalIndex = i;
309               }
310               break;
311             }
312             i++;
313           }
314         }
315 
316         if(equalIndex == size_t.max) {
317           this.values.put(value);
318         } else {
319           Attribute a = {attributeNamespace, value[attributeNamespace.length..equalIndex], value[equalIndex+1..$]};
320           this.attributes.put(a);
321         }
322 
323       }
324     }
325   }
326 
327   /// Gets the tag ready to parse a new sdl tree by resetting the depth and the line number.
328   /// It is unnecessary to call this before parsing the first sdl tree but would not be harmful.
329   /// It does not reset the namespace/name/values/attributes because those will
330   /// be reset by the parser on the next call to parseSdlTag when it calls $(D resetForNextTag()).
331   void resetForNewSdl() {
332     depth = 0;
333     line = 1;
334   }
335 
336   /// Resets the tag state to get ready to parse the next tag.
337   /// Should only be called by the parseSdlTag function.
338   /// This will clear the namespace/name/values/attributes and increment the depth if the current tag
339   /// had an open brace.
340   void resetForNextTag()
341   {
342     this.namespace.length = 0;
343     this.name = null;
344     if(hasOpenBrace) {
345       hasOpenBrace = false;
346       this.depth++;
347     }
348     this.values.clear();
349     this.attributes.clear();
350   }
351 
352   /// Returns true if the tag is anonymous.
353   bool isAnonymous()
354   {
355     return anonymousTagNameIsNull ? this.name is null : this.name == "content";
356   }
357   /// Sets the tag as anonymous
358   void setIsAnonymous()
359   {
360     this.name = anonymousTagNameIsNull ? null : "content";
361   }
362   /// Returns: true if the tag namespaces/names/values/attributes are
363   ///          the same even if the depth/line/options are different.
364   bool opEquals(ref Tag other) {
365     return
366       namespace == other.namespace &&
367       name == other.name &&
368       values.data == other.values.data &&
369       attributes.data == other.attributes.data;
370   }
371 
372   /// Returns: A string of the Tag not including it's children.  The string will be valid SDL
373   ///          by itself but will not include the open brace if it has one.  Use toSdl for that.
374   string toString() {
375     string str = "";
376     if(namespace.length) {
377       str ~= namespace;
378       str ~= name;
379     }
380     if(!isAnonymous || (values.data.length == 0 && attributes.data.length == 0)) {
381       str ~= name;
382     }
383     foreach(value; values.data) {
384       str ~= ' ';
385       str ~= value;
386     }
387     foreach(attribute; attributes.data) {
388       str ~= ' ';
389       if(attribute.namespace.length) {
390         str ~= attribute.namespace;
391         str ~= ':';
392       }
393       str ~= attribute.id;
394       str ~= '=';
395       str ~= attribute.value;
396     }
397     return str;
398   }
399 
400   /// Writes the tag as standard SDL to sink.
401   /// It will write the open brace '{' but since the tag does not have a knowledge
402   /// about it's children, its up to the caller to write the close brace '}' after it
403   /// writes the children to the sink.
404   void toSdl(S, string indent = "    ")(S sink) if(isOutputRange!(S,const(char)[])) {
405     //writefln("[DEBUG] converting to sdl namespace=%s name=%s values=%s attr=%s",
406     //namespace, name, values.data, attributes.data);
407     for(auto i = 0; i < depth; i++) {
408       sink.put(indent);
409     }
410     if(namespace.length) {
411       sink.put(namespace);
412       sink.put(":");
413     }
414     if(!isAnonymous || (values.data.length == 0 && attributes.data.length == 0)) {
415       if(namespace.length == 0 && isSdlKeyword(name)) {
416         sink.put(":"); // Escape tag names that are keywords
417       }
418       sink.put(name);
419     }
420     foreach(value; values.data) {
421       sink.put(" ");
422       sink.put(value);
423     }
424     foreach(attribute; attributes.data) {
425       sink.put(" ");
426       if(attribute.namespace.length) {
427         sink.put(attribute.namespace);
428         sink.put(":");
429       }
430       sink.put(attribute.id);
431       sink.put("=");
432       sink.put(attribute.value);
433     }
434     if(hasOpenBrace) {
435       sink.put(" {\n");
436     } else {
437       sink.put("\n");
438     }
439   }
440 
441 
442 
443 
444   //
445   // User Methods
446   //
447   void throwIsUnknown() {
448     throw new SdlParseException(line, format("unknown tag '%s'", name));
449   }
450   void throwIsDuplicate() {
451     throw new SdlParseException(line, format("tag '%s' appeared more than once", name));
452   }
453   void getOneValue(T)(ref T value) {
454     if(values.data.length != 1) {
455       throw new SdlParseException
456         (line,format("tag '%s' %s 1 value but had %s",
457                      name, (values.data.length == 0) ? "must have at least" : "can only have", values.data.length));
458     }
459 
460     const(char)[] literal = values.data[0];
461 
462 
463     static if( isSomeString!T ) {
464 
465       if(!value.empty) throwIsDuplicate();
466 
467     } else static if( isIntegral!T || isFloatingPoint!T ) {
468 
469         //if( value != 0 ) throwIsDuplicate();
470 
471     } else {
472 
473     }
474 
475     if(!sdlLiteralToD!(T)(literal, value)) throw new SdlParseException(line, format("cannot convert '%s' to %s", literal, typeid(T)));
476   }
477 
478   void getValues(T, bool allowAppend=false)(ref T[] t, size_t minCount = 1) {
479     if(values.data.length < minCount) throw new SdlParseException(line, format("tag '%s' must have at least %s value(s)", name, minCount));
480 
481     size_t arrayOffset;
482     if(t.ptr is null) {
483       arrayOffset = 0;
484       t = new T[values.data.length];
485     } else if(allowAppend) {
486       arrayOffset = t.length;
487       t.length += values.data.length;
488     } else throwIsDuplicate();
489 
490     foreach(literal; values.data) {
491       static if( isSomeString!T ) {
492         if(literal[0] != '"') throw new SdlParseException(line, format("tag '%s' must have exactly one string literal but had another literal type", name));
493         t[arrayOffset++] = literal[1..$-1]; // remove surrounding quotes
494       } else {
495         assert(0, format("Cannot convert sdl literal to D '%s' type", typeid(T)));
496       }
497     }
498   }
499 
500 
501   void enforceNoValues() {
502     if(values.data.length) throw new SdlParseException(line, format("tag '%s' cannot have any values", name));
503   }
504   void enforceNoAttributes() {
505     if(attributes.data.length) throw new SdlParseException(line, format("tag '%s' cannot have any attributes", name));
506   }
507   void enforceNoChildren() {
508     if(hasOpenBrace) throw new SdlParseException(line, format("tag '%s' cannot have any children", name));
509   }
510 
511 
512 }
513 
514 version = use_lookup_tables;
515 
516 bool isSdlKeyword(const char[] token) {
517   return
518     token == "null" ||
519     token == "true" ||
520     token == "false" ||
521     token == "on" ||
522     token == "off";
523 }
524 bool isIDStart(dchar c) {
525     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
526 /+
527  The lookup table doesn't seem to be as fast here, maybe this case I should just compare the ranges
528   version(use_lookup_tables) {
529     return (c < sdlLookup.length) ? ((sdlLookup[c] & idStartFlag) != 0) : false;
530   } else {
531     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
532   }
533 +/
534 }
535 bool isID(dchar c) {
536   version(use_lookup_tables) {
537     return (c < sdlLookup.length) ? ((sdlLookup[c] & sdlIDFlag) != 0) : false;
538   } else {
539     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c == '.' || c == '$';
540   }
541 }
542 enum tooManyEndingBraces = "too many ending braces '}'";
543 enum noEndingQuote = "string missing ending quote";
544 enum invalidBraceFmt = "found '{' on a different line than its tag '%s'.  fix the sdl by moving '{' to the same line";
545 enum mixedValuesAndAttributesFmt = "SDL values cannot appear after attributes, bring '%s' in front of the attributes for tag '%s'";
546 enum notEnoughCloseBracesFmt = "reached end of sdl but missing %s close brace(s) '}'";
547 
548 
549 struct SdlParser(A)
550 {
551   char[] buffer;
552   A allocator;
553   char[] leftover;
554   this(char[] buffer, A allocator)
555   {
556     this.buffer = buffer;
557     this.allocator = allocator;
558   }
559   ref Sink parse(Source,Sink)(Source source, Sink sink)
560     if (isInputRange!Source &&
561         isOutputRange!(Sink, ElementType!Source))
562   {
563     // todo implement
564   }
565 }
566 
567 
568 
569 /// Converts literal to the given D type T.
570 /// This is a wrapper arround the $(D sdlLiteralToD) function that returns true on sucess, except
571 /// this function returns the value itself and throws an SdlParseException on error.
572 T sdlLiteralToD(T)(const(char)[] literal) {
573   T value;
574   if(!sdlLiteralToD!(T)(literal, value))
575     throw new SdlParseException(format("failed to convert '%s' to a %s", literal, typeid(T)));
576   return value;
577 }
578 
579 /// Converts literal to the given D type T.
580 /// If isSomeString!T, then it will remove the surrounding quotes if they are present.
581 /// Returns: true on succes, false on failure
582 bool sdlLiteralToD(T)(const(char)[] literal, ref T t) {
583 
584   assert(literal.length);
585 
586 
587   static if( is( T == bool) ) {
588 
589     if(literal == "true" || literal == "on" || literal == "1") t = true;
590     if(literal == "false" || literal == "off" || literal == "0") t = false;
591 
592   } else static if( isSomeString!T ) {
593 
594   if(literal[0] == '"' && literal.length > 1 && literal[$-1] == '"') {
595     t = cast(T)literal[1..$-1];
596   } else {
597     t = cast(T)literal;
598   }
599 
600   } else static if( isIntegral!T || isFloatingPoint!T ) {
601 
602     // remove any postfix characters
603     while(true) {
604       char c = literal[$-1];
605       if(c >= '0' && c <= '9') break;
606       literal.length--;
607       if(literal.length == 0) return false;
608     }
609 
610     t =  to!T(literal);
611 
612   } else {
613 
614     t = to!T(literal);
615 
616   }
617 
618   return true;
619 }
620 
621 
622 
623 
624 
625 string arrayRange(char min, char max, string initializer) {
626   string initializers = "";
627   for(char c = min; c < max; c++) {
628     initializers ~= "'"~c~"': "~initializer~",\n";
629   }
630   initializers ~= "'"~max~"': "~initializer;
631   return initializers;
632 }
633 string rangeInitializers(string[] s...) {
634   if(s.length % 2 != 0) assert(0, "must supply an even number of arguments to rangeInitializers");
635   string code = "["~rangeInitializersCurrent(s);
636   //assert(0, code); // uncomment to see the code
637   return code;
638 }
639 string rangeInitializersCurrent(string[] s) {
640   string range = s[0];
641   if(range[0] == '\'') {
642     if(range.length == 3 || (range.length == 4 && range[1] == '\\')) {
643       if(range[$-1] != '\'') throw new Exception(format("a single-character range %s started with an apostrophe (') but did not end with one", range));
644       return range ~ ":" ~ s[1] ~ rangeInitializersNext(s);
645     }
646   } else {
647     throw new Exception(format("range '%s' not supported", range));
648   }
649   char min = range[1];
650   char max = range[5];
651   return arrayRange(min, max, s[1]) ~ rangeInitializersNext(s);
652 }
653 string rangeInitializersNext(string[] s...) {
654   if(s.length <= 2) return "]";
655   return ",\n"~rangeInitializersCurrent(s[2..$]);
656 }
657 
658 
659 enum ubyte sdlIDFlag                  = 0x01;
660 enum ubyte sdlNumberFlag              = 0x02;
661 enum ubyte sdlNumberPostfixFlag       = 0x04;
662 enum ubyte sdlValidAfterTagItemFlag   = 0x08;
663 
664 version(use_lookup_tables) {
665   mixin("private __gshared ubyte[256] sdlLookup = "~rangeInitializers
666         ("'_'"    , "sdlIDFlag",
667 
668          `'a'`    , "sdlIDFlag",
669          `'b'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
670          `'c'`    , "sdlIDFlag",
671          `'d'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
672          `'e'`    , "sdlIDFlag",
673          `'f'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
674          `'g'-'k'`, "sdlIDFlag",
675          `'l'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
676          `'m'-'z'`, "sdlIDFlag",
677 
678          `'A'`    , "sdlIDFlag",
679          `'B'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
680          `'C'`    , "sdlIDFlag",
681          `'D'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
682          `'E'`    , "sdlIDFlag",
683          `'F'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
684          `'G'-'K'`, "sdlIDFlag",
685          `'L'`    , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag",
686          `'M'-'Z'`, "sdlIDFlag",
687 
688          `'0'-'9'`, "sdlIDFlag | sdlNumberFlag",
689          `'-'`    , "sdlIDFlag",
690          `'.'`    , "sdlIDFlag | sdlNumberFlag",
691          `'$'`    , "sdlIDFlag",
692 
693          `' '`    , "sdlValidAfterTagItemFlag",
694          `'\t'`   , "sdlValidAfterTagItemFlag",
695          `'\n'`   , "sdlValidAfterTagItemFlag",
696          `'\v'`   , "sdlValidAfterTagItemFlag",
697          `'\f'`   , "sdlValidAfterTagItemFlag",
698          `'\r'`   , "sdlValidAfterTagItemFlag",
699 
700          `'{'`    , "sdlValidAfterTagItemFlag",
701          `'}'`    , "sdlValidAfterTagItemFlag",
702          `';'`    , "sdlValidAfterTagItemFlag",
703          `'\\'`    , "sdlValidAfterTagItemFlag",
704          `'/'`    , "sdlValidAfterTagItemFlag",
705          `'#'`    , "sdlValidAfterTagItemFlag",
706 
707 
708          )~";");
709 }
710 
711 /// A convenience function to parse a single tag.
712 /// Calls $(D tag.resetForNewSdl) and then calls $(D parseSdlTag).
713 void parseOneSdlTag(Tag* tag, char[] sdlText) {
714   tag.resetForNewSdl();
715   if(!parseSdlTag(tag, &sdlText)) throw new SdlParseException(tag.line, format("The sdl text '%s' did not contain any tags", sdlText));
716 }
717 
718 /// Parses one SDL tag (not including its children) from sdlText saving slices for every
719 /// name/value/attribute to the given tag struct.
720 /// This function assumes that sdlText contains at least one full SDL _tag.
721 /// The only time this function will allocate memory is if the value/attribute appenders
722 /// in the tag struct are not large enough to hold all the values.
723 /// Because of this, after the tag values/attributes are populated, it is up to the caller to copy
724 /// any memory they wish to save unless sdlText is going to persist in memory.
725 /// Note: this function does not handle the UTF-8 bom because it doesn't make sense to re-check
726 ///       for the BOM before every tag.
727 /// Params:
728 ///   tag = An address to a Tag structure to save the sdl tag's name/values/attributes.
729 ///   sdlText = An address to the sdl text character array.
730 ///             the function will move the front of the slice foward past
731 ///             any sdl that was parsed.
732 /// Returns: true if a tag was found, false otherwise
733 /// Throws: SdlParseException or Utf8Exception
734 bool parseSdlTag(Tag* tag, char[]* sdlText)
735 {
736   // developer note:
737   //   whenever reading the next character, the next pointer must be saved to cpos
738   //   if the character could be used later, but if the next is guaranteed to
739   //   be thrown away (such as when skipping till the next newline after a comment)
740   //   then cpos does not need to be saved.
741   char *next = (*sdlText).ptr;
742   char *limit = next + sdlText.length;
743 
744 
745   tag.resetForNextTag(); // make sure this is done first
746 
747   char* cpos;
748   dchar c;
749   char[] tokenNamespace;
750   char[] attributeID;
751   char[] token;
752 
753   void enforceNoMoreTags() {
754     if(tag.depth > 0) throw new SdlParseException(tag.line, format(notEnoughCloseBracesFmt, tag.depth));
755   }
756 
757   void readNext()
758   {
759     cpos = next;
760     c = decodeUtf8(&next, limit);
761   }
762 
763   bool isIDStart() {
764     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
765 /+
766     The lookup table actually seems to be slower in this case
767     version(use_lookup_tables) {
768       return (c < sdlLookup.length) ? ((sdlLookup[c] & idStartFlag) != 0) : false;
769     } else {
770       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
771     }
772 +/
773   }
774   bool isID() {
775     version(use_lookup_tables) {
776       return c < sdlLookup.length && ((sdlLookup[c] & sdlIDFlag) != 0);
777     } else {
778       return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c == '.' || c == '$';
779     }
780   }
781   bool isValidAfterTagItem() {
782     version(use_lookup_tables) {
783       return c < sdlLookup.length && ((sdlLookup[c] & sdlValidAfterTagItemFlag) != 0);
784     } else {
785       implement("isValidAfterTagItem without lookup table");
786     }
787   }
788   bool isNumber() {
789     version(use_lookup_tables) {
790       return c < sdlLookup.length && ((sdlLookup[c] & sdlNumberFlag) != 0);
791     } else {
792       implement("isNumber without lookup table");
793     }
794   }
795   bool isNumberPostfix() {
796     version(use_lookup_tables) {
797       return c < sdlLookup.length && ((sdlLookup[c] & sdlNumberPostfixFlag) != 0);
798     } else {
799       implement("isNumberPostfix without lookup table");
800     }
801   }
802 
803   // expected c/cpos to b pointing at a character before newline, so will ready first
804   // before checking for newlines
805   void toNextLine()
806   {
807     while(true) {
808       if(next >= limit) { return; }
809       c = decodeUtf8(&next, limit); // no need to save cpos since c will be thrown away
810       if(c == '\n') { tag.line++; return; }
811     }
812   }
813 
814   // expects c/cpos to point at the first character of the id and for it to already be checked
815   // when this function is done, c/cpos will pointing to the first character after the id, or
816   // cpos == limit if there are no characters after the id
817   void parseID()
818   {
819     while(true) {
820       if(next >= limit) { cpos = limit; return; }
821       readNext();
822       if(!isID()) return;
823     }
824   }
825 
826 
827   // expects c/cpos to point at the first character after the id
828   // Returns: true if the id is actually a value
829   // NOTE: this should only becaused if no namespace was found yet, this function
830   //       will always return false if c/cpos is pointing to a ':' which indicates
831   //       that it is a namespace even if the namespace could be a value like null or true/false
832   bool currentIDIsValue(char* startOfID) {
833     //switch on the length
834     switch(cpos - startOfID) {
835     case 0-1: return false;
836     case 2: return startOfID[0..2] == "on";
837     case 3: return startOfID[0..3] == "off";
838     case 4: return startOfID[0..4] == "null" ||
839                    startOfID[0..4] == "true";
840     case 5: return startOfID[0..5] == "false";
841     default: return false;
842     }
843   }
844 
845   // Returns: true if a newline was found
846   // ExpectedState:
847   //   c/cpos: points to the first character of the potential whitespace/comment
848   // ReturnState:
849   //   c/cpos: points to the first character after all the whitespace/comments
850   bool skipWhitespaceAndComments()
851   {
852     LineNumber lineBefore = tag.line;
853 
854     while(true) {
855 
856       // TODO: maybe use a lookup table here
857       if(c == ' ' || c == '\t' || c =='\v' || c == '\f' || c == '\r') {
858 
859         // do nothing (check first as this is the most likely case)
860 
861       } else if(c == '\n') {
862 
863         tag.line++;
864 
865       } else if(c == '#') {
866 
867         toNextLine();
868 
869       } else if(c == '-' || c == '/') {
870 
871         if(next >= limit) return tag.line > lineBefore;
872 
873         dchar secondChar = decodeUtf8(&next, limit);
874 
875         if(secondChar == c) { // '--' or '//'
876 
877           toNextLine();
878 
879         } else if(secondChar == '*') {
880 
881         MULTILINE_COMMENT_LOOP:
882           while(next < limit) {
883 
884             c = decodeUtf8(&next, limit); // no need to save cpos since c will be thrown away
885             if(c == '\n') {
886               tag.line++;
887             } else if(c == '*') {
888               // loop assume c is pointing to a '*' and next is pointing to the next characer
889               while(next < limit) {
890 
891                 c = decodeUtf8(&next, limit);
892                 if(c == '/') break MULTILINE_COMMENT_LOOP;
893                 if(c == '\n') {
894                   tag.line++;
895                 } else if(c != '*') {
896                   break;
897                 }
898               }
899             }
900           }
901 
902         } else {
903           return tag.line > lineBefore;
904         }
905 
906       } else {
907         return tag.line > lineBefore;
908       }
909 
910       //
911       // Goto next character
912       //
913       if(next >= limit) {cpos = next; break;}
914       readNext();
915     }
916 
917     return tag.line > lineBefore;
918   }
919 
920 
921   // The atFirstToken will only parse tokens that are valid IDs
922   // Expected State:
923   //   tokenNamespace/attributeID to be empty (sets these if they are found)
924   //   c/cpos: points at the first character of the token
925   // Return State:
926   //   If it finds a token, will set tokenNamespace and/or token with values
927   //   c/cpos: points to the next character after the literal
928   //   next: character after cpos
929   void tryParseToken(bool atFirstToken) {
930     token.length = 0; // clear the previous token
931 
932     // Handles
933     //  tag name
934     //  tag namespace/ tag name
935     //  unquoted string
936     //  attribute
937     //  attribute with namespace, does not handle the value after the attribute
938     if(isIDStart() || c == ':') { // colon for empty namespaces
939       auto start = cpos;
940 
941       if(c != ':') {
942         while(true) {
943           cpos = next;
944           if(next >= limit) { token = start[0..next-start]; return; }
945           c = decodeUtf8(&next, limit);
946           if(!isID()) break;
947         }
948       }
949 
950       // Handle Namespaces
951       if(c == ':') {
952 
953         tokenNamespace = start[0..next-start]; // include the colon so the calling function
954                                                // can differentiate between the empty namespace
955                                                // and no namespace
956         cpos = next;
957         if(next >= limit) { token.length = 0; return; }
958         c = decodeUtf8(&next, limit);
959 
960         if(!isIDStart()) {
961           if(atFirstToken) {
962             if(!isValidAfterTagItem()) throw new SdlParseException
963                                          (tag.line, format("Character '%s' cannot appear after the tag's namespace", c));
964             return;
965           }
966           throw new SdlParseException(tag.line,
967                                       format("expected alphanum or '_' after an attribute namespace colon, but got '%s'", c));
968         }
969 
970         start = cpos;
971         while(true) {
972           cpos = next;
973           if(next >= limit) { token = start[0..next-start]; return; }
974           c = decodeUtf8(&next, limit);
975           if(!isID()) break;
976         }
977 
978       }
979 
980       // Handle attributes
981       if(c != '=') {
982         if(!isValidAfterTagItem()) throw new SdlParseException
983                                      (tag.line, format("Character '%s' cannot appear after tag item", c));
984         token = start[0..next-start-1];
985         return;
986       }
987 
988       // Setup to parse the value
989       attributeID = start[0..next-start-1]; // subtract 1 for the '='
990       if(next >= limit) throw new SdlParseException
991                           (tag.line, format("sdl cannot end with '=' character"));
992       cpos = next;
993       c = decodeUtf8(&next, limit);
994     }
995 
996 
997     // Handles: keywords and,
998     //          unquoted strings in attribute values
999     if(isIDStart()) {
1000       auto start = cpos;
1001 
1002       while(true) {
1003         cpos = next;
1004         if(next >= limit) { token = start[0..next-start]; return; }
1005         c = decodeUtf8(&next, limit);
1006         if(!isID()) break;
1007       }
1008 
1009       if(!isValidAfterTagItem()) throw new SdlParseException
1010                                    (tag.line, format("Character '%s' cannot appear after a tag item", c));
1011 
1012       token = start[0..next-start-1];
1013       return;
1014     }
1015 
1016     if(atFirstToken) return;
1017 
1018     if(c == '"') {
1019 
1020       bool containsEscapes = false;
1021 
1022       while(true) {
1023 
1024         if(next >= limit) throw new SdlParseException(tag.line, noEndingQuote);
1025         c = decodeUtf8(&next, limit); // no need to save cpos since c will be thrown away
1026         if(c == '"') break;
1027         if(c == '\\') {
1028           containsEscapes = true;
1029           if(next >= limit) throw new SdlParseException(tag.line, noEndingQuote);
1030           // NOTE TODO: remember to handle escaped newlines
1031           c = decodeUtf8(&next, limit);
1032         } else if(c == '\n') {
1033           throw new SdlParseException(tag.line, noEndingQuote);
1034         }
1035 
1036       }
1037 
1038       if(containsEscapes) {
1039 
1040         /* do something differnt if immuable */
1041         implement("escaped strings");
1042 
1043       } else {
1044         token = cpos[0..next - cpos];
1045       }
1046 
1047       cpos = next;
1048       if( next < limit) c = decodeUtf8(&next, limit);
1049 
1050     } else if(c == '`') {
1051 
1052       implement("tick strings");
1053 
1054     } else if(c >= '0' && c <= '9' || c == '-' || c == '.') {
1055 
1056       auto start = cpos;
1057 
1058       while(true) {
1059         cpos = next;
1060         if(next >= limit) { token = start[0..cpos-start]; break; }
1061         c = decodeUtf8(&next, limit);
1062         if(!isNumber()) { token = start[0..cpos-start]; break; }
1063 
1064         //if(tag.rejectTypedNumbers && isNumberPostfix())
1065         //throw new SdlParseException(tag.line, "using this sdl mode, postfix characters indicating the type after a number are not allowed");
1066       }
1067 
1068     } else if(c == '\'') {
1069 
1070       implement("sing-quoted characters");
1071 
1072     }
1073   }
1074 
1075   //
1076   // Read the first character
1077   //
1078   if(next >= limit) { enforceNoMoreTags(); goto RETURN_NO_TAG; }
1079   readNext();
1080 
1081   while(true) {
1082 
1083     skipWhitespaceAndComments();
1084     if(cpos >= limit) { enforceNoMoreTags(); goto RETURN_NO_TAG; }
1085 
1086     //
1087     //
1088     // Get the tag name/namespace
1089     //
1090     tokenNamespace.length = 0;
1091     attributeID.length = 0;
1092     tryParseToken(true);
1093 
1094     if(token.length || tokenNamespace.length) {
1095 
1096       if(attributeID.length) {
1097 
1098         tag.setIsAnonymous();
1099 
1100         if(tokenNamespace.length) tokenNamespace = tokenNamespace[0..$-1]; // Remove ending ':' on namespace
1101         Attribute attribute = {tokenNamespace, attributeID, token};
1102         tag.attributes.put(attribute);
1103 
1104       } else {
1105 
1106         if(tokenNamespace.length) {
1107           tag.namespace = tokenNamespace[0..$-1];
1108           if(token.length) {
1109             tag.name = token;
1110           } else {
1111             tag.setIsAnonymous();
1112             if(tag.namespace.length == 0)
1113               throw new SdlParseException(tag.line, "A tag cannot have an empty namespace and an empty name");
1114           }
1115         } else {
1116 
1117           if(isSdlKeyword(token)) {
1118             tag.setIsAnonymous();
1119             tag.values.put(token);
1120           } else {
1121             tag.name = token;
1122           }
1123 
1124         }
1125 
1126       }
1127 
1128     } else if(c == '}') {
1129 
1130       if(tag.depth == 0) throw new SdlParseException(tag.line, tooManyEndingBraces);
1131       tag.depth--;
1132 
1133       // Read the next character
1134       if(next >= limit) { enforceNoMoreTags(); goto RETURN_NO_TAG; }
1135       cpos = next;
1136       c = decodeUtf8(&next, limit);
1137 
1138       continue;
1139 
1140     } else if(c == '\\') {
1141       throw new SdlParseException(tag.line, "expected tag or '}' but got backslash '\\'");
1142     } else if(c == '{') {
1143       throw new SdlParseException(tag.line, "expected tag or '}' but got '{'");
1144     } else if(c == ':') {
1145       throw new SdlParseException(tag.line, "expected tag or '}' but got ':'");
1146     } else {
1147 
1148       tag.namespace.length = 0;
1149       tag.setIsAnonymous();
1150 
1151     }
1152 
1153     //
1154     //
1155     // Found a valid tag, now get values and attributes
1156     //
1157     //
1158   GET_VALUES_AND_ATTRIBUTES:
1159     while(true) {
1160 
1161       // At the beginning of this loop, it is expected that c/cpos will be pointing the
1162       // next character after the last thing (tag/value/attribute)
1163 
1164       if(cpos >= limit) goto RETURN_TAG; // I may not need this check
1165       auto foundNewline = skipWhitespaceAndComments();
1166       if(cpos >= limit) goto RETURN_TAG;
1167       if(foundNewline) {
1168 
1169         // check if it is a curly brace to either print a useful error message
1170         // or determine if the tag has children
1171         if(c != '{') {
1172           next = cpos; // rewind so whatever character it is will
1173                        // be parsed again on the next call
1174           goto RETURN_TAG;
1175         }
1176         if(tag.allowBraceAfterNewline) {
1177           tag.hasOpenBrace = true;
1178           goto RETURN_TAG;
1179         }
1180 
1181         throw new SdlParseException(SdlErrorType.braceAfterNewline, tag.line,
1182                                     format(invalidBraceFmt, tag.name));
1183       }
1184 
1185       //
1186       // At this point c must contain a non-whitespace character
1187       // and we must have already parsed the tag name
1188       //
1189 
1190       if(c == ';') goto RETURN_TAG;
1191 
1192       //
1193       // Handle the '\' character to escape newlines
1194       //
1195       if(c == '\\') {
1196         if(next >= limit) goto RETURN_TAG; // (check to make sure ending an sdl file with a backslash is ok)
1197         c = decodeUtf8(&next, limit);
1198 
1199         foundNewline = skipWhitespaceAndComments();
1200         if(cpos >= limit) goto RETURN_TAG;
1201         if(!foundNewline) throw new SdlParseException(tag.line, "only comments/whitespace can follow a backslash '\\'");
1202 
1203         continue;
1204       }
1205 
1206       if(c == '{') {
1207         tag.hasOpenBrace = true; // depth will be incremented at the next parse
1208         goto RETURN_TAG;
1209       }
1210 
1211       if(c == '}') {
1212         if(tag.depth == 0) throw new SdlParseException(tag.line, tooManyEndingBraces);
1213         next = cpos; // rewind so the '}' will be seen on the next call and
1214                      // the depth will change on the next call
1215         goto RETURN_TAG;
1216       }
1217 
1218       //
1219       // Try to parse an attribute
1220       //
1221       tokenNamespace.length = 0;
1222       attributeID.length = 0;
1223       tryParseToken(false);
1224       if(token.length || tokenNamespace.length) {
1225 
1226         if(attributeID.length > 0) {
1227           if(tokenNamespace.length) tokenNamespace = tokenNamespace[0..$-1]; // Remove ending ':' on namespace
1228           Attribute attribute = {tokenNamespace, attributeID, token};
1229           tag.attributes.put(attribute);
1230         } else {
1231 
1232           if(tokenNamespace.length)
1233             throw new SdlParseException(tag.line, format("Found a tag value with a namespace '%s%s'?", tokenNamespace, token));
1234 
1235           if(tag.attributes.data.length) {
1236             if(!tag.allowMixedValuesAndAttributes)
1237               throw new SdlParseException(SdlErrorType.mixedValuesAndAttributes, tag.line,
1238                                           format(mixedValuesAndAttributesFmt, token, tag.name));
1239           }
1240 
1241           tag.values.put(token);
1242         }
1243 
1244         if(cpos >= limit) goto RETURN_TAG;
1245 
1246       } else {
1247 
1248         if(attributeID.length > 0) throw new SdlParseException(tag.line, "expected sdl literal to follow attribute '=' but was not a literal");
1249 
1250         if(c == '\0') throw new Exception("possible code bug: found null");
1251         throw new Exception(format("Unhandled character '%s' (code=0x%x)", c, cast(uint)c));
1252 
1253       }
1254     }
1255 
1256   }
1257 
1258   assert(0);
1259 
1260  RETURN_TAG:
1261   *sdlText = next[0..limit-next];
1262   return true;
1263 
1264  RETURN_NO_TAG:
1265   (*sdlText) = limit[0..0];
1266   return false;
1267 }
1268 
1269 version(unittest)
1270 {
1271   char[2048] sdlBuffer;
1272   char[sdlBuffer.length] sdlBuffer2;
1273   char[] setupSdlText(const(char[]) sdlText, bool copySdl)
1274   {
1275     if(!copySdl) return cast(char[])sdlText;
1276 
1277     if(sdlText.length >= sdlBuffer.length) throw new Exception(format("attempting to copy sdl of length %s but sdlBuffer is only of length %s", sdlText.length, sdlBuffer.length));
1278     sdlBuffer[0..sdlText.length] = sdlText;
1279     return sdlBuffer[0..sdlText.length];
1280   }
1281 
1282   struct SdlBuffer2Sink
1283   {
1284     size_t offset;
1285     @property
1286     char[] slice() { return sdlBuffer2[0..offset]; }
1287     void put(inout(char)[] value) {
1288       sdlBuffer2[offset..offset+value.length] = value;
1289       offset += value.length;
1290     }
1291   }
1292 
1293 }
1294 
1295 
1296 unittest
1297 {
1298   //return; // Uncomment to disable these tests
1299 
1300   mixin(scopedTest!"SdlParse");
1301 
1302   Tag parsedTag;
1303 
1304   void useProposed() {
1305     debug writefln("[TEST] SdlMode: Proposed");
1306     parsedTag.useProposedSdl();
1307   }
1308   void useStrict() {
1309     debug writefln("[TEST] SdlMode: Strict");
1310     parsedTag.useStrictSdl();
1311   }
1312   struct SdlTest
1313   {
1314     bool copySdl;
1315     string sdlText;
1316     Tag[] expectedTags;
1317     size_t line;
1318     this(string sdlText, Tag[] expectedTags, size_t line = __LINE__) {
1319       this.copySdl = false;
1320       this.sdlText = sdlText;
1321       this.expectedTags = expectedTags;
1322       this.line = line;
1323     }
1324   }
1325 
1326   void testParseSdl(bool reparse = true)(bool copySdl, const(char)[] sdlText, Tag[] expectedTags = [], size_t line = __LINE__)
1327   {
1328     size_t previousDepth = size_t.max;
1329     SdlBuffer2Sink buffer2Sink;
1330 
1331     auto escapedSdlText = escape(sdlText);
1332 
1333     debug {
1334       static if(reparse) {
1335         writefln("[TEST] testing sdl              '%s'", escapedSdlText);
1336       } else {
1337         writefln("[TEST] testing sdl (regenerated)'%s'", escapedSdlText);
1338       }
1339     }
1340 
1341     char[] next = setupSdlText(sdlText, copySdl);
1342 
1343     parsedTag.resetForNewSdl();
1344 
1345 
1346     try {
1347 
1348       for(auto i = 0; i < expectedTags.length; i++) {
1349         if(!parseSdlTag(&parsedTag, &next)) {
1350           writefln("Expected %s tag(s) but only got %s", expectedTags.length, i);
1351           writefln("Error: test on line %s", line);
1352           assert(0);
1353         }
1354 
1355         static if(reparse) {
1356           if(previousDepth != size_t.max) {
1357             while(previousDepth > parsedTag.depth) {
1358               buffer2Sink.put("}");
1359               previousDepth--;
1360             }
1361           }
1362         }
1363 
1364         auto expectedTag = expectedTags[i];
1365         if(parsedTag.namespace != expectedTag.namespace) {
1366           writefln("Error: expected tag namespace '%s' but got '%s'", expectedTag.namespace, parsedTag.namespace);
1367           writefln("Error: test on line %s", line);
1368           assert(0);
1369         }
1370         if(parsedTag.name != expectedTag.name) {
1371           writefln("Error: expected tag name '%s' but got '%s'", expectedTag.name, parsedTag.name);
1372           writefln("Error: test on line %s", line);
1373           assert(0);
1374         }
1375         //writefln("[DEBUG] expected value '%s', actual values '%s'", expectedTag.values.data, parsedTag.values.data);
1376         if(parsedTag.values.data != expectedTag.values.data) {
1377           writefln("Error: expected tag values '%s' but got '%s'", expectedTag.values.data, parsedTag.values.data);
1378           writefln("Error: test on line %s", line);
1379           assert(0);
1380         }
1381         if(parsedTag.attributes.data != expectedTag.attributes.data) {
1382           writefln("Error: expected tag attributes '%s' but got '%s'", expectedTag.attributes.data, parsedTag.attributes.data);
1383           writefln("Error: test on line %s", line);
1384           assert(0);
1385         }
1386 
1387         // put the tag into the buffer2 sink to reparse again after
1388         static if(reparse) {
1389           parsedTag.toSdl(&buffer2Sink);
1390           previousDepth = parsedTag.depth;
1391           if(parsedTag.hasOpenBrace) previousDepth++;
1392         }
1393       }
1394 
1395       if(parseSdlTag(&parsedTag, &next)) {
1396         writefln("Expected %s tag(s) but got at least one more (depth=%s, name='%s')",
1397                  expectedTags.length, parsedTag.depth, parsedTag.name);
1398         writefln("Error: test on line %s", line);
1399         assert(0);
1400       }
1401 
1402     } catch(SdlParseException e) {
1403       writefln("[TEST] this sdl threw an unexpected SdlParseException: '%s'", escape(sdlText));
1404       writeln(e);
1405       writefln("Error: test on line %s", line);
1406       assert(0);
1407     } catch(Exception e) {
1408       writefln("[TEST] this sdl threw an unexpected Exception: '%s'", escape(sdlText));
1409       writeln(e);
1410       writefln("Error: test on line %s", line);
1411       assert(0);
1412     }
1413 
1414     static if(reparse) {
1415       if(previousDepth != size_t.max) {
1416         while(previousDepth > parsedTag.depth) {
1417           buffer2Sink.put("}");
1418           previousDepth--;
1419         }
1420       }
1421 
1422       if(buffer2Sink.slice != sdlText &&
1423          (buffer2Sink.slice.length && buffer2Sink.slice[0..$-1] != sdlText)) {
1424         testParseSdl!false(false, buffer2Sink.slice, expectedTags, line);
1425       }
1426     }
1427 
1428   }
1429 
1430   void testInvalidSdl(bool copySdl, const(char)[] sdlText, SdlErrorType expectedErrorType = SdlErrorType.unknown, size_t line = __LINE__) {
1431     auto escapedSdlText = escape(sdlText);
1432     debug writefln("[TEST] testing invalid sdl '%s'", escapedSdlText);
1433 
1434     SdlErrorType actualErrorType = SdlErrorType.unknown;
1435 
1436     char[] next = setupSdlText(sdlText, copySdl);
1437 
1438     parsedTag.resetForNewSdl();
1439     try {
1440       while(parseSdlTag(&parsedTag, &next)) { }
1441       writefln("Error: invalid sdl was successfully parsed: %s", sdlText);
1442       writefln("Error: test was on line %s", line);
1443       assert(0);
1444     } catch(SdlParseException e) {
1445       debug writefln("[TEST]    got expected error: %s", e.msg);
1446       actualErrorType = e.type;
1447     } catch(Utf8Exception e) {
1448       debug writefln("[TEST]    got expected error: %s", e.msg);
1449     }
1450 
1451     if(expectedErrorType != SdlErrorType.unknown &&
1452        expectedErrorType != actualErrorType) {
1453       writefln("Error: expected error '%s' but got error '%s'", expectedErrorType, actualErrorType);
1454       writefln("Error: test was on line %s", line);
1455       assert(0);
1456     }
1457 
1458   }
1459 
1460   testParseSdl(false, "");
1461   testParseSdl(false, "  ");
1462   testParseSdl(false, "\n");
1463 
1464   testParseSdl(false, "#Comment");
1465   testParseSdl(false, "#Comment copyright \u00a8");
1466   testParseSdl(false, "#Comment\n");
1467   testParseSdl(false, "#Comment\r\n");
1468   testParseSdl(false, "  #   Comment\r\n");
1469 
1470   testParseSdl(false, "  --   Comment\n");
1471   testParseSdl(false, " ------   Comment\n");
1472 
1473   testParseSdl(false, "  #   Comment1 \r\n  -- Comment 2");
1474 
1475 
1476   testParseSdl(false, " //   Comment\n");
1477   testParseSdl(false, " ////   Comment\n");
1478 
1479   testParseSdl(false, "/* a multiline comment \n\r\n\n\n\t hello stuff # -- // */");
1480 
1481   // TODO: test this using the allowBracesAfterNewline option
1482   //  testParseSdl(false, "tag /*\n\n*/{ child }", Tag("tag"), Tag("child"));
1483 
1484 
1485   testParseSdl(false, "a", [Tag("a")]);
1486   testParseSdl(false, "ab", [Tag("ab")]);
1487   testParseSdl(false, "abc", [Tag("abc")]);
1488   testParseSdl(false, "firsttag", [Tag("firsttag")]);
1489   testParseSdl(false, "funky._-$tag", [Tag("funky._-$tag")]);
1490 
1491 
1492   {
1493     auto prefixes = ["", " ", "\t", "--comment\n"];
1494     foreach(prefix; prefixes) {
1495       testInvalidSdl(false, prefix~":");
1496     }
1497   }
1498 
1499 
1500   auto validCharactersAfterTag =
1501     [" ", "\t", "\n", "\v", "\f", "\r", ";", "//", "\\", "#", "{}"];
1502 
1503 
1504   auto namespaces = ["a:", "ab:", "abc:"];
1505   bool isProposedSdl = false;
1506   while(true) {
1507     string tagName;
1508     if(isProposedSdl) {
1509       tagName = null;
1510       useProposed();
1511     } else {
1512       tagName = "content";
1513     }
1514     foreach(namespace; namespaces) {
1515       testParseSdl(false, namespace, [Tag(namespace~tagName)]);
1516       foreach(suffix; validCharactersAfterTag) {
1517         testParseSdl(false, namespace~suffix, [Tag(namespace~tagName)]);
1518       }
1519       testParseSdl(false, "tag1{"~namespace~"}", [Tag("tag1"), Tag(namespace~tagName)]);
1520     }
1521     if(isProposedSdl) break;
1522     isProposedSdl = true;
1523   }
1524   useStrict();
1525 
1526 
1527   testParseSdl(false, "a:a", [Tag("a:a")]);
1528   testParseSdl(false, "ab:a", [Tag("ab:a")]);
1529 
1530   testParseSdl(false, "a:ab", [Tag("a:ab")]);
1531   testParseSdl(false, "ab:ab", [Tag("ab:ab")]);
1532 
1533   testParseSdl(false, "html:table", [Tag("html:table")]);
1534 
1535   testParseSdl(false, ";", [Tag("content")]);
1536   testParseSdl(false, "myid;", [Tag("myid")]);
1537   testParseSdl(false, "myid;   ", [Tag("myid")]);
1538   testParseSdl(false, "myid #comment", [Tag("myid")]);
1539   testParseSdl(false, "myid # comment \n", [Tag("myid")]);
1540   testParseSdl(false, "myid -- comment \n # more comments\n", [Tag("myid")]);
1541 
1542 
1543   testParseSdl(false, "myid /* multiline comment */", [Tag("myid")]);
1544   testParseSdl(false, "myid /* multiline comment */ ", [Tag("myid")]);
1545   testParseSdl(false, "myid /* multiline comment */\n", [Tag("myid")]);
1546   testParseSdl(false, "myid /* multiline comment \n\n */", [Tag("myid")]);
1547   testParseSdl(false, "myid /* multiline comment **/ \"value\"", [Tag("myid", `"value"`)]);
1548   testParseSdl(false, "myid /* multiline comment \n\n */another-id", [Tag("myid"), Tag("another-id")]);
1549   testParseSdl(false, "myid /* multiline comment */ \"value\"", [Tag("myid", `"value"`)]);
1550   testParseSdl(false, "myid /* multiline comment \n */ \"value\"", [Tag("myid"), Tag("content", `"value"`)]);
1551   testInvalidSdl(false, "myid /* multiline comment \n */ { \n }");
1552   useProposed();
1553   testParseSdl(false, "myid /* multiline comment */ { \n }", [Tag("myid")]);
1554   testParseSdl(false, "myid /* multiline comment \n */ \"value\"", [Tag("myid"), Tag(null, `"value"`)]);
1555   useStrict();
1556 
1557 
1558   testParseSdl(false, "tag1\ntag2", [Tag("tag1"), Tag("tag2")]);
1559   testParseSdl(false, "tag1;tag2\ntag3", [Tag("tag1"), Tag("tag2"), Tag("tag3")]);
1560 
1561   testInvalidSdl(false, "myid {");
1562   testInvalidSdl(false, "myid {\n\n");
1563 
1564   testInvalidSdl(false, "{}");
1565 
1566   testParseSdl(false, "tag1{}", [Tag("tag1")]);
1567   testParseSdl(false, "tag1{}tag2", [Tag("tag1"), Tag("tag2")]);
1568   testParseSdl(false, "tag1{}\ntag2", [Tag("tag1"), Tag("tag2")]);
1569 
1570   testParseSdl(false, "tag1{tag1.1}tag2", [Tag("tag1"), Tag("tag1.1"), Tag("tag2")]);
1571 
1572   //
1573   // Handling the backslash '\' character
1574   //
1575   testInvalidSdl(false, "\\"); // slash must in the context of a tag
1576   testInvalidSdl(false, `tag \ x`);
1577 
1578   testParseSdl(false, "tag\\", [Tag("tag")]); // Make sure this is valid sdl
1579   testParseSdl(false, "tag \\  \n \\ \n \"hello\"", [Tag("tag", `"hello"`)]);
1580 
1581   //
1582   // Test the keywords (white box tests trying to attain full code coverage)
1583   //
1584   auto keywords = ["null", "true", "false", "on", "off"];
1585 
1586   foreach(keyword; keywords) {
1587     testParseSdl(false, keyword, [Tag("content", keyword)]);
1588   }
1589 
1590   namespaces = ["", "n:", "namespace:"];
1591   foreach(namespace; namespaces) {
1592     sdlBuffer[0..namespace.length] = namespace;
1593     auto afterTagName = namespace.length + 4;
1594     sdlBuffer[namespace.length..afterTagName] = "tag ";
1595     string expectedTagName = namespace~"tag";
1596 
1597     foreach(keyword; keywords) {
1598       for(auto cutoff = 1; cutoff < keyword.length; cutoff++) {
1599         sdlBuffer[afterTagName..afterTagName+cutoff] = keyword[0..cutoff];
1600         //testInvalidSdl(false, sdlBuffer[0..afterTagName+cutoff]);
1601         testParseSdl(false, sdlBuffer[0..afterTagName+cutoff], [Tag(expectedTagName, sdlBuffer[afterTagName..afterTagName+cutoff])]);
1602       }
1603     }
1604     auto suffixes = [";", " \t;", "\n", "{}", " \t {\n }"];
1605     foreach(keyword; keywords) {
1606       auto limit = afterTagName+keyword.length;
1607 
1608       sdlBuffer[afterTagName..limit] = keyword;
1609       testParseSdl(false, sdlBuffer[0..limit], [Tag(expectedTagName, keyword)]);
1610 
1611       foreach(suffix; suffixes) {
1612         sdlBuffer[limit..limit+suffix.length] = suffix;
1613         testParseSdl(false, sdlBuffer[0..limit+suffix.length], [Tag(expectedTagName, keyword)]);
1614       }
1615     }
1616     foreach(keyword; keywords) {
1617 
1618       foreach(attrNamespace; namespaces) {
1619 
1620         for(auto cutoff = 1; cutoff <= keyword.length; cutoff++) {
1621           auto limit = afterTagName + attrNamespace.length;
1622           sdlBuffer[afterTagName..limit] = attrNamespace;
1623           limit += cutoff;
1624           sdlBuffer[limit - cutoff..limit] = keyword[0..cutoff];
1625           sdlBuffer[limit..limit+8] = `="value"`;
1626           testParseSdl(false, sdlBuffer[0..limit+8], [Tag(expectedTagName, format(`%s%s="value"`, attrNamespace, keyword[0..cutoff]))]);
1627 
1628           foreach(otherKeyword; keywords) {
1629             sdlBuffer[limit+1..limit+1+otherKeyword.length] = otherKeyword;
1630             testParseSdl(false, sdlBuffer[0..limit+1+otherKeyword.length],
1631                          [Tag(expectedTagName, format("%s%s=%s", attrNamespace, keyword[0..cutoff], otherKeyword))]);
1632           }
1633         }
1634 
1635       }
1636 
1637     }
1638   }
1639 
1640 
1641 
1642 
1643   //
1644   // String Literals
1645   //
1646   testParseSdl(false, `a "apple"`, [Tag("a", `"apple"`)]);
1647   testParseSdl(false, "a \"pear\"\n", [Tag("a", `"pear"`)]);
1648   testParseSdl(false, "a \"left\"\nb \"right\"", [Tag("a", `"left"`), Tag("b", `"right"`)]);
1649   testParseSdl(false, "a \"cat\"\"dog\"\"bear\"\n", [Tag("a", `"cat"`, `"dog"`, `"bear"`)]);
1650   testParseSdl(false, "a \"tree\";b \"truck\"\n", [Tag("a", `"tree"`), Tag("b", `"truck"`)]);
1651 
1652 
1653   //
1654   // Unquoted Strings
1655   //
1656   testParseSdl(false, "tag string", [Tag("tag", "string")]);
1657   testParseSdl(false, "tag attr=string", [Tag("tag", "attr=string")]);
1658 
1659 
1660 
1661   //
1662   // Attributes
1663   //
1664   testParseSdl(false, "tag attr=null", [Tag("tag", "attr=null")]);
1665   testParseSdl(false, "tag \"val\" attr=null", [Tag("tag", `"val"`, "attr=null")]);
1666 
1667   auto mixedValuesAndAttributesTests =
1668     [
1669      SdlTest("tag attr=null \"val\"", [Tag("tag", "attr=null", `"val"`)] )
1670      ];
1671 
1672   foreach(test; mixedValuesAndAttributesTests) {
1673     testInvalidSdl(test.copySdl, test.sdlText, SdlErrorType.mixedValuesAndAttributes);
1674   }
1675   useProposed();
1676   foreach(test; mixedValuesAndAttributesTests) {
1677     testParseSdl(test.copySdl, test.sdlText, test.expectedTags);
1678   }
1679   useStrict();
1680 
1681   foreach(suffix; validCharactersAfterTag) {
1682     testParseSdl(false, "tag attr=null"~suffix, [Tag("tag", "attr=null")]);
1683     testParseSdl(false, "tag attr=true"~suffix, [Tag("tag", "attr=true")]);
1684     testParseSdl(false, "tag attr=unquoted"~suffix, [Tag("tag", "attr=unquoted")]);
1685     testParseSdl(false, "tag attr=\"quoted\""~suffix, [Tag("tag", "attr=\"quoted\"")]);
1686     testParseSdl(false, "tag attr=1234"~suffix, [Tag("tag", "attr=1234")]);
1687 
1688     testParseSdl(false, "attr=null"~suffix, [Tag("content", "attr=null")]);
1689     testParseSdl(false, "attr=true"~suffix, [Tag("content", "attr=true")]);
1690     testParseSdl(false, "attr=unquoted"~suffix, [Tag("content", "attr=unquoted")]);
1691     testParseSdl(false, "attr=\"quoted\""~suffix, [Tag("content", "attr=\"quoted\"")]);
1692     testParseSdl(false, "attr=1234"~suffix, [Tag("content", "attr=1234")]);
1693   }
1694 
1695   //
1696   // Test parsing numbers without extracting them
1697   //
1698   enum numberPostfixes = ["", "l", "L", "f", "F", "d", "D", "bd", "BD"];
1699   {
1700     enum sdlPostfixes = ["", " ", ";", "\n"];
1701 
1702     auto numbers = ["0", "12", "9876", "5432", /*".1",*/ "0.1", "12.4", /*"1.",*/ "8.04",  "123.l"];
1703 
1704 
1705     for(size_t negative = 0; negative < 2; negative++) {
1706       string prefix = negative ? "-" : "";
1707 
1708       foreach(postfix; numberPostfixes) {
1709         foreach(number; numbers) {
1710 
1711           auto testNumber = prefix~number~postfix;
1712 
1713           if(postfix.length) {
1714             useProposed();
1715             //testInvalidSdl(false, "tag "~testNumber);
1716             useStrict();
1717           }
1718           //testInvalidSdl(false, "tag "~testNumber~"=");
1719 
1720           foreach(sdlPostfix; sdlPostfixes) {
1721             testParseSdl(false, "tag "~testNumber~sdlPostfix, [Tag("tag", testNumber)]);
1722           }
1723         }
1724       }
1725 
1726 
1727     }
1728   }
1729 
1730   //
1731   // Test parsing numbers and extracting them
1732   //
1733   {
1734     for(size_t negative = 0; negative < 2; negative++) {
1735       string prefix = negative ? "-" : "";
1736 
1737       foreach(postfix; numberPostfixes) {
1738 
1739         void testNumber(Types...)(ulong expectedValue) {
1740           long expectedSignedValue = negative ? -1 * (cast(long)expectedValue) : cast(long)expectedValue;
1741 
1742           foreach(Type; Types) {
1743             if(negative && isUnsigned!Type) continue;
1744             if(expectedSignedValue > Type.max) continue;
1745             static if( is(Type == float) || is(Type == double) || is(Type == real)) {
1746               if(expectedSignedValue < Type.min_normal) continue;
1747             } else {
1748               if(expectedSignedValue < Type.min) continue;
1749             }
1750 
1751             debug writefln("[DEBUG] testing %s on %s", typeid(Type), parsedTag.values.data[0]);
1752             Type t;
1753             parsedTag.getOneValue(t);
1754             assert(t == cast(Type) expectedSignedValue, format("Expected (%s) %s but got %s", typeid(Type), expectedSignedValue, t));
1755           }
1756         }
1757         void testDecimalNumber(Types...)(real expectedValue) {
1758           foreach(Type; Types) {
1759             if(negative && isUnsigned!Type) continue;
1760             if(expectedValue > Type.max) continue;
1761             static if( is(Type == float) || is(Type == double) || is(Type == real)) {
1762               if(expectedValue < Type.min_normal) continue;
1763             } else {
1764               if(expectedValue < Type.min) continue;
1765             }
1766 
1767             debug writefln("[DEBUG] testing %s on %s", typeid(Type), parsedTag.values.data[0]);
1768             Type t;
1769             parsedTag.getOneValue(t);
1770             assert(t - cast(Type) expectedValue < .01, format("Expected (%s) %s but got %s", typeid(Type), cast(Type)expectedValue, t));
1771           }
1772         }
1773 
1774         alias testNumber!(byte,ubyte,short,ushort,int,uint,long,ulong,float,double,real) testNumberOnAllTypes;
1775         alias testDecimalNumber!(float,double,real) testDecimalNumberOnAllTypes;
1776 
1777         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"0"~postfix);
1778         testNumberOnAllTypes(0);
1779 
1780         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"1"~postfix);
1781         testNumberOnAllTypes(1);
1782 
1783         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"12"~postfix);
1784         testNumberOnAllTypes(12);
1785 
1786         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"9987"~postfix);
1787         testNumberOnAllTypes(9987);
1788 
1789         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"0.0"~postfix);
1790         testDecimalNumberOnAllTypes(0.0);
1791 
1792         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~".1"~postfix);
1793         testDecimalNumberOnAllTypes(0.1);
1794 
1795         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~".000001"~postfix);
1796         testDecimalNumberOnAllTypes(0.000001);
1797 
1798         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"100384.999"~postfix);
1799         testDecimalNumberOnAllTypes(100384.999);
1800 
1801         parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"3.14159265"~postfix);
1802         testDecimalNumberOnAllTypes(3.14159265);
1803       }
1804     }
1805 
1806   }
1807 
1808 
1809   //
1810   // Children
1811   //
1812   testInvalidSdl(false, "{}"); // no line can start with a curly brace
1813 
1814   auto braceAfterNewlineTests =
1815     [
1816      SdlTest("tag\n{  child\n}", [Tag("tag"), Tag("child")]),
1817      SdlTest("colors \"hot\" \n{  yellow\n}", [Tag("colors", `"hot"`), Tag("yellow")])
1818      ];
1819 
1820   foreach(test; braceAfterNewlineTests) {
1821     testInvalidSdl(test.copySdl, test.sdlText, SdlErrorType.braceAfterNewline);
1822   }
1823   useProposed();
1824   foreach(test; braceAfterNewlineTests) {
1825     testParseSdl(test.copySdl, test.sdlText, test.expectedTags);
1826   }
1827   useStrict();
1828 
1829   //
1830   // Odd corner cases
1831   //
1832   testParseSdl(false, "tag null;", [Tag("tag", "null")]);
1833   testParseSdl(false, "tag null{}", [Tag("tag", "null")]);
1834   testParseSdl(false, "tag true;", [Tag("tag", "null")]);
1835   testParseSdl(false, "tag true{}", [Tag("tag", "null")]);
1836   testParseSdl(false, "tag false;", [Tag("tag", "null")]);
1837   testParseSdl(false, "tag false{}", [Tag("tag", "null")]);
1838 
1839   testParseSdl(false, "namespace:true", [Tag("namespace:true")]);
1840   testParseSdl(false, ":true", [Tag("true")]);
1841   testParseSdl(false, "true", [Tag("content", "true")]);
1842   testParseSdl(false, "tag\\", [Tag("tag")]);
1843   testParseSdl(false, "tag/*comment*/null", [Tag("tag", "null")]);
1844   testParseSdl(false, "crazy--tag", [Tag("crazy--tag")]);
1845   testParseSdl(false, "tag# comment", [Tag("tag")]);
1846   testParseSdl(false, "tag// comment", [Tag("tag")]);
1847   testParseSdl(false, "a=what", [Tag("content", "a=what")]);
1848 
1849   testParseSdl(false, "tag {\nattr=value//\n}", [Tag("tag"), Tag("content", "attr=value")]);
1850   testParseSdl(false, "tag {\nattr=value}", [Tag("tag"), Tag("content", "attr=value")]);
1851 
1852 
1853   testInvalidSdl(false, "tag what/huh");
1854   testInvalidSdl(false, `tag"value"`);
1855   testInvalidSdl(false, "attr:123");
1856   testInvalidSdl(false, "attr:\"what\"");
1857   testInvalidSdl(false, "attr:=what");
1858   testInvalidSdl(false, "attr:=null");
1859   testInvalidSdl(false, "attr:=345");
1860   testInvalidSdl(false, "name:tag:weird");
1861   testInvalidSdl(false, "tag namespace:what");
1862   testInvalidSdl(false, "tag namespace:\"value\"");
1863   testInvalidSdl(false, "tag namespace:null");
1864   testInvalidSdl(false, "tag namespace:true");
1865   testInvalidSdl(false, "tag^");
1866   testInvalidSdl(false, "tag<");
1867   testInvalidSdl(false, "tag>");
1868 
1869 
1870 
1871 
1872 
1873   // TODO: testing using all keywords as namespaces true:id, etc.
1874   testParseSdl(false, "tag null:null=\"value\";", [Tag("tag", "null:null=\"value\"")]);
1875   testParseSdl(false, "null", [Tag("content", "null")]);
1876 
1877 
1878 
1879   //
1880   // Full Parses
1881   //
1882   testParseSdl(false, `
1883 name "joe"
1884 children {
1885   name "jim"
1886 }`, [Tag("name", `"joe"`), Tag("children"), Tag("name", `"jim"`)]);
1887 
1888   testParseSdl(false, `
1889 parent name="jim" {
1890   child "hasToys" name="joey" {
1891      # just a comment here for now
1892   }
1893 }`, [Tag("parent", "name=\"jim\""), Tag("child", "name=\"joey\"", `"hasToys"`)]);
1894 
1895 
1896   testParseSdl(false,`td 34
1897 html:td "Puggy"
1898 `, [Tag("td", `34`),
1899     Tag("html:td", `"Puggy"`)]);
1900 
1901 
1902   testParseSdl(false,`html:table {
1903   html:tr {
1904     html:th "Name"
1905     html:th "Age"
1906     html:th "Pet"
1907   }
1908   html:tr {
1909     html:td "Brian"
1910     html:td 34
1911     html:td "Puggy"
1912     html:td null
1913     html:td false
1914   }
1915   tr {
1916     td "Jackie"
1917     td 27
1918     td null
1919   }
1920 }`, [Tag("html:table"),
1921       Tag("html:tr"),
1922         Tag("html:th", `"Name"`),
1923         Tag("html:th", `"Age"`),
1924         Tag("html:th", `"Pet"`),
1925       Tag("html:tr"),
1926         Tag("html:td", `"Brian"`),
1927         Tag("html:td", `34`),
1928         Tag("html:td", `"Puggy"`),
1929         Tag("html:td", `null`),
1930         Tag("html:td", `false`),
1931       Tag("tr"),
1932         Tag("td", `"Jackie"`),
1933         Tag("td", `27`),
1934         Tag("td", `null`)]);
1935 }
1936 
1937 /// Assists in walking an SDL tree which supports the StAX method of parsing.
1938 /// Examples:
1939 /// ---
1940 /// Tag tag;
1941 /// SdlWalker walker = SdlWalker(&tag, sdl);
1942 /// while(walker.pop()) {
1943 ///     // use tag to process the current tag
1944 ///
1945 ///     auto depth = tag.childrenDepth();
1946 ///     while(walker.pop(depth)) {
1947 ///         // process tag again as a child tag
1948 ///     }
1949 ///
1950 /// }
1951 /// ---
1952 struct SdlWalker
1953 {
1954   /// A pointer to the tag structure that will be populated after parsing every tag.
1955   Tag* tag;
1956 
1957   // The sdl text that has yet to be parsed.
1958   private char[] sdl;
1959 
1960   // Used for when a child walker has popped a parent tag
1961   bool tagAlreadyPopped;
1962 
1963   this(Tag* tag, char[] sdl) {
1964     this.tag = tag;
1965     this.sdl = sdl;
1966   }
1967 
1968   /// Parses the next tag at the given depth.
1969   /// Returns: true if it parsed a tag at the given depth and false if there are no more
1970   ///          tags at the given depth. If depth is 0 it means the sdl has been fully parsed.
1971   /// Throws: Exception if the current tag has children and they were not parsed
1972   ///         and allowSkipChildren is set to false.
1973   bool pop(size_t depth = 0, bool allowSkipChildren = false) {
1974     if(tagAlreadyPopped) {
1975       if(depth < tag.depth) throw new Exception("possible code bug here?");
1976       if(tag.depth == depth) {
1977         tagAlreadyPopped = false;
1978         return true;
1979       }
1980     }
1981 
1982     while(true) {
1983       size_t previousDepth;
1984       const(char)[] previousName;
1985 
1986       if(!allowSkipChildren) {
1987         previousDepth = tag.depth;
1988         previousName = tag.name;
1989       }
1990 
1991       if(!parseSdlTag(this.tag, &sdl)) {
1992         assert(tag.depth == 0, format("code bug: parseSdlTag returned end of input but tag.depth was %s (not 0)", tag.depth));
1993         return false;
1994       }
1995 
1996       if(this.tag.depth == depth) return true;
1997 
1998       // Check if it is the end of this set of children
1999       if(this.tag.depth < depth) {
2000         tagAlreadyPopped = true;
2001         return false;
2002       }
2003 
2004       if(!allowSkipChildren) throw new Exception(format("forgot to call children on tag '%s' at depth %s", previousName, previousDepth));
2005     }
2006   }
2007 
2008   public size_t childrenDepth() { return tag.depth + 1; }
2009 }
2010 
2011 version(unittest)
2012 {
2013   struct Dependency {
2014     string name;
2015     string version_;
2016   }
2017   // Example of parsing a configuration file
2018   struct Package {
2019     const(char)[] name;
2020     const(char)[] description;
2021 
2022     const(char)[][] authors;
2023     auto dependencies = appender!(Dependency[])();
2024     auto subPackages = appender!(Package[])();
2025 
2026     void reset() {
2027       name = null;
2028       description = null;
2029       authors = null;
2030       dependencies.clear();
2031       subPackages.clear();
2032     }
2033     bool opEquals(ref const Package p) {
2034       return
2035         name == p.name &&
2036         description == p.description &&
2037         authors == p.authors &&
2038         dependencies.data == p.dependencies.data &&
2039         subPackages.data == p.subPackages.data;
2040     }
2041     void parseSdlPackage(bool copySdl, string sdlText) {
2042       parseSdlPackage(setupSdlText(sdlText, copySdl));
2043     }
2044     void parseSdlPackage(char[] sdlText) {
2045       Tag tag;
2046       auto sdl = SdlWalker(&tag, sdlText);
2047       while(sdl.pop()) {
2048 
2049         debug writefln("[sdl] (depth %s) tag '%s'%s", tag.depth, tag.name,
2050                        tag.hasOpenBrace ? "(has children)" : "");
2051 
2052         if(tag.name == "name") {
2053 
2054           tag.enforceNoAttributes();
2055           tag.enforceNoChildren();
2056           tag.getOneValue(this.name);
2057 
2058         } else if(tag.name == "description") {
2059 
2060           tag.enforceNoAttributes();
2061           tag.enforceNoChildren();
2062           tag.getOneValue(this.description);
2063 
2064         } else if(tag.name == "authors") {
2065 
2066           if(this.authors !is null) tag.throwIsDuplicate();
2067           tag.enforceNoAttributes();
2068           tag.enforceNoChildren();
2069           tag.getValues(this.authors);
2070 
2071         } else tag.throwIsUnknown();
2072 
2073       }
2074 
2075     }
2076   }
2077 }
2078 
2079 
2080 unittest
2081 {
2082   mixin(scopedTest!"SdlWalker");
2083 
2084   void testPackage(bool copySdl, string sdlText, ref Package expectedPackage)
2085   {
2086     Package parsedPackage;
2087 
2088     parsedPackage.parseSdlPackage(copySdl, sdlText);
2089 
2090     if(expectedPackage != parsedPackage) {
2091       writefln("Expected package: %s", expectedPackage);
2092       writefln(" but got package: %s", parsedPackage);
2093       assert(0);
2094     }
2095   }
2096 
2097   string sdl;
2098   Package expectedPackage;
2099 
2100   expectedPackage = Package("my-package", "an example sdl package",
2101                             ["Jonathan", "David", "Amy"]);
2102 
2103   testPackage(false, `
2104 name        "my-package"
2105 description "an example sdl package"
2106 authors     "Jonathan" "David" "Amy"
2107 `, expectedPackage);
2108 }
2109 
2110 unittest
2111 {
2112   mixin(scopedTest!"SdlWalkerOnPerson");
2113 
2114   struct Person {
2115     const(char)[] name;
2116     ushort age;
2117     const(char)[][] nicknames;
2118     Person[] children;
2119     void reset() {
2120       name = null;
2121       age = 0;
2122       nicknames = null;
2123       children.length = 0;
2124     }
2125     bool opEquals(ref const Person p) {
2126       return
2127         name == p.name &&
2128         age == p.age &&
2129         nicknames == p.nicknames &&
2130         children == p.children;
2131     }
2132     string toString() {
2133       return format("Person(\"%s\", %s, %s, %s)", name, age, nicknames, children);
2134     }
2135     void validate() {
2136       if(name is null) throw new Exception("person is missing the 'name' tag");
2137       if(age == 0) throw new Exception("person is missing the 'age' tag");
2138     }
2139     void parseFromSdl(ref SdlWalker walker) {
2140       auto tag = walker.tag;
2141 
2142       tag.enforceNoValues();
2143       tag.enforceNoAttributes();
2144 
2145       reset();
2146 
2147       auto childBuilder = appender!(Person[])();
2148 
2149       auto depth = walker.childrenDepth();
2150       while(walker.pop(depth)) {
2151 
2152         //writefln("[sdl] (depth %s) tag '%s'%s", tag.depth, tag.name,
2153         //tag.hasOpenBrace ? "(has children)" : "");
2154         //stdout.flush();
2155 
2156         if(tag.name == "name") {
2157 
2158           tag.enforceNoAttributes();
2159           tag.enforceNoChildren();
2160           tag.getOneValue(name);
2161 
2162         } else if(tag.name == "age") {
2163 
2164           tag.enforceNoAttributes();
2165           tag.enforceNoChildren();
2166           tag.getOneValue(age);
2167 
2168         } else if(tag.name == "nicknames") {
2169 
2170           tag.enforceNoAttributes();
2171           tag.enforceNoChildren();
2172           tag.getValues(nicknames);
2173 
2174         } else if(tag.name == "child") {
2175 
2176           Person child = Person();
2177           child.parseFromSdl(walker);
2178           childBuilder.put(child);
2179 
2180         } else tag.throwIsUnknown();
2181 
2182       }
2183 
2184       this.children = childBuilder.data.dup;
2185       childBuilder.clear();
2186       validate();
2187     }
2188   }
2189 
2190   Appender!(Person[]) parsePeople(char[] sdl) {
2191     auto people = appender!(Person[])();
2192     Person person;
2193 
2194     Tag tag;
2195     auto walker = SdlWalker(&tag, sdl);
2196     while(walker.pop()) {
2197       if(tag.name == "person") {
2198 
2199         person.parseFromSdl(walker);
2200         people.put(person);
2201 
2202       } else tag.throwIsUnknown();
2203     }
2204 
2205     return people;
2206   }
2207 
2208   void testParsePeople(bool copySdl, string sdlText, Person[] expectedPeople...)
2209   {
2210     Appender!(Person[]) parsedPeople;
2211     try {
2212 
2213       parsedPeople = parsePeople(setupSdlText(sdlText, copySdl));
2214 
2215     } catch(Exception e) {
2216       writefln("the following sdl threw an unexpected exception: %s", sdlText);
2217       writeln(e);
2218       assert(0);
2219     }
2220 
2221     if(expectedPeople.length != parsedPeople.data.length) {
2222       writefln("Expected: %s", expectedPeople);
2223       writefln(" but got: %s", parsedPeople.data);
2224       assert(0);
2225     }
2226     for(auto i = 0; i < expectedPeople.length; i++) {
2227       Person expectedPerson = expectedPeople[i];
2228       if(expectedPerson != parsedPeople.data[i]) {
2229         writefln("Expected: %s", expectedPeople);
2230         writefln(" but got: %s", parsedPeople.data);
2231         assert(0);
2232       }
2233     }
2234 
2235   }
2236 
2237   testParsePeople(false, `
2238 person {
2239     name "Robert"
2240     age 29
2241     nicknames "Bob" "Bobby"
2242     child {
2243         name "Jack"
2244         age 6
2245         nicknames "Little Jack"
2246     }
2247     child {
2248         name "Sally"
2249         age 8
2250     }
2251 }`, Person("Robert", 29, ["Bob", "Bobby"], [Person("Jack", 6, ["Little Jack"]),Person("Sally", 8)]));
2252 
2253 }
2254