1 /** 2 $(P An SDL ($(LINK2 http://www.ikayzo.org/display/SDL/Home,Simple Declarative Language)) parser. 3 Supports StAX/SAX style API. See $(D more.std.dom) for DOM style API.) 4 5 Examples: 6 -------------------------------- 7 void printTags(char[] sdl) { 8 Tag tag; 9 while(parseSdlTag(&tag, &sdl)) { 10 writefln("(depth %s) tag '%s' values '%s' attributes '%s'", 11 tag.depth, tag.name, tag.values.data, tag.attributes.data); 12 } 13 } 14 15 struct Person { 16 string name; 17 ushort age; 18 string[] nicknames; 19 auto children = appender!(Person[])(); 20 void reset() { 21 name = null; 22 age = 0; 23 nicknames = null; 24 children.clear(); 25 } 26 void parseFromSdl(ref SdlWalker walker) { 27 tag.enforceNoValues(); 28 tag.enforceNoAttributes(); 29 reset(); 30 foreach(auto personWalker = walker.children(); 31 !personWalker.empty; personWalker.popFront) { 32 33 if(tag.name == "name") { 34 35 tag.enforceNoAttributes(); 36 tag.enforceNoChildren(); 37 tag.getOneValue(name); 38 39 } else if(tag.name == "age") { 40 41 tag.enforceNoAttributes(); 42 tag.enforceNoChildren(); 43 tag.getOneValue(age); 44 45 } else if(tag.name == "nicknames") { 46 47 tag.enforceNoAttributes(); 48 tag.enforceNoChildren(); 49 tag.getValues(nicknames); 50 51 } else if(tag.name == "child") { 52 53 54 55 // todo implement 56 57 } else tag.throwIsUnknown(); 58 } 59 } 60 void validate() { 61 if(name == null) throw new Exception("person is missing the 'name' tag"); 62 if(age == 0) throw new Exception("person is missing the 'age' tag"); 63 } 64 } 65 66 void parseTags(char[] sdl) { 67 struct Person { 68 string name; 69 ushort age; 70 string[] nicknames; 71 Person[] children; 72 void reset() { 73 name = null; 74 age = 0; 75 nicknames = null; 76 children = null; 77 } 78 void validate() { 79 if(name == null) throw new Exception("person is missing the 'name' tag"); 80 if(age == 0) throw new Exception("person is missing the 'age' tag"); 81 } 82 } 83 auto people = appender(Person[])(); 84 Person person; 85 86 Tag tag; 87 for(auto walker = SdlWalker(&tag, sdl); !walker.empty; walker.popFront) { 88 if(tag.name == "person") { 89 90 tag.enforceNoValues(); 91 tag.enforceNoAttributes(); 92 person.reset(); 93 person.validate(); 94 people.put(person); 95 96 } else tag.throwIsUnknown(); 97 } 98 } 99 100 -------------------------------- 101 TODO: implement escaped strings 102 TODO: finish unit tests 103 TODO: write a input-range sdl parser 104 TODO: implement datetime/timespans 105 106 Authors: Jonathan Marler, johnnymarler@gmail.com 107 License: use freely for any purpose 108 */ 109 110 module more.sdl; 111 112 import core.stdc..string : memmove; 113 114 import std.array; 115 import std..string; 116 import std.range; 117 import std.conv; 118 import std.bitmanip; 119 import std.traits; 120 121 import more.common; 122 import more.utf8; 123 124 version(unittest) 125 { 126 import std.stdio; 127 } 128 129 alias LineNumber = size_t; 130 131 /// Used in SdlParseException to distinguish specific sdl parse errors. 132 enum SdlErrorType { 133 unknown, 134 braceAfterNewline, 135 mixedValuesAndAttributes, 136 } 137 /// Thrown by sdl parse functions when invalid SDL is encountered. 138 class SdlParseException : Exception 139 { 140 SdlErrorType type; 141 LineNumber lineInSdl; 142 this(LineNumber lineInSdl, string msg, string file = __FILE__, size_t codeLine = __LINE__) { 143 this(SdlErrorType.unknown, lineInSdl, msg, file, codeLine); 144 } 145 this(SdlErrorType errorType, LineNumber lineInSdl, string msg, string file = __FILE__, size_t codeLine = __LINE__) { 146 super((lineInSdl == 0) ? msg : "line "~to!string(lineInSdl)~": "~msg, file, codeLine); 147 this.type = errorType; 148 this.lineInSdl = lineInSdl; 149 } 150 } 151 152 /// Holds three character arrays for an SDL attribute, namespace/id/value. 153 struct Attribute { 154 //size_t line, column; 155 const(char)[] namespace; 156 const(char)[] id; 157 const(char)[] value; 158 } 159 160 /// Contains a tag's name, values and attributes. 161 /// It does not contain any information about its child tags because that part of the sdl would not have been parsed yet, however, 162 /// it does indicate if the tag was followed by an open brace. 163 /// This struct is used directly for the StAX/SAX APIs and indirectly for the DOM or Reflection APIs. 164 struct Tag { 165 166 // A bifield of flags used to pass extra options to parseSdlTag. 167 // Used to accept/reject different types of SDL or cause parseSdlTag to 168 // behave differently like preventing it from modifying the sdl text. 169 private ubyte flags; 170 171 /// Normally SDL only allows a tag's attributes to appear after all it's values. 172 /// This flag causes parseSdlTag to allow values/attributes to appear in any order, i.e. 173 /// $(D tag attr="my-value" "another-value" # would be valid) 174 @property @safe bool allowMixedValuesAndAttributes() pure nothrow const { return (flags & 1U) != 0;} 175 @property @safe void allowMixedValuesAndAttributes(bool v) pure nothrow { if (v) flags |= 1U;else flags &= ~1U;} 176 177 /// Causes parseSdlTag to allow a tag's open brace to appear after any number of newlines 178 @property @safe bool allowBraceAfterNewline() pure nothrow const { return (flags & 2U) != 0;} 179 @property @safe void allowBraceAfterNewline(bool v) pure nothrow { if (v) flags |= 2U;else flags &= ~2U;} 180 181 /// Causes parseSdlTag to allow a child tags to appear on the same line as the parents 182 /// open and close braces 183 @property @safe bool allowChildTagsOnSameLineAsBrace() pure nothrow const { return (flags & 4U) != 0;} 184 @property @safe void allowChildTagsOnSameLineAsBrace(bool v) pure nothrow { if (v) flags |= 4U;else flags &= ~4U;} 185 186 // Causes parseSdlTag to throw an exception if it finds any number literals 187 // with postfix letters indicating the type 188 // @property @safe bool verifyTypedNumbers() pure nothrow const { return (flags & 4U) != 0;} 189 // @property @safe void verifyTypedNumbers(bool v) pure nothrow { if (v) flags |= 4U;else flags &= ~4U;} 190 191 /// Causes parseSdlTag to set the tag name to null instead of "content" for anonymous tags. 192 /// This allows the application to differentiate betweeen "content" tags and anonymous tags. 193 @property @safe bool anonymousTagNameIsNull() pure nothrow const { return (flags & 8U) != 0;} 194 @property @safe void anonymousTagNameIsNull(bool v) pure nothrow { if (v) flags |= 8U;else flags &= ~8U;} 195 196 /// Causes parseSdlTag to accept non-quoted strings 197 @property @safe bool acceptUnquotedStrings() pure nothrow const { return (flags & 16U) != 0;} 198 @property @safe void acceptUnquotedStrings(bool v) pure nothrow { if (v) flags |= 16U;else flags &= ~16U;} 199 200 /// Prevents parseSdlTag from modifying the given sdl text for things such as 201 /// processing escaped strings 202 @property @safe bool preserveSdlText() pure nothrow const { return (flags & 128U) != 0;} 203 @property @safe void preserveSdlText(bool v) pure nothrow { if (v) flags |= 128U;else flags &= ~128U;} 204 205 206 // TODO: maybe add an option to specify that any values accessed should be copied to new buffers 207 // NOTE: Do not add an option to prevent parseSdlTag from throwing exceptions when the input has ended. 208 // It may have been useful for an input buffered object, however, the buffered input object will 209 // need to know when it has a full tag anyway so the sdl will already contain the characters to end the tag. 210 // Or in the case of braces on the next line, if the tag has alot of whitespace until the actual end-of-tag 211 // delimiter, the buffered input reader can insert a semi-colon or open_brace to signify the end of the tag 212 // earlier. 213 214 215 /// For now an alias for useStrictSdl. Use this function if you want your code to always use 216 /// the default mode whatever it may become. 217 alias useStrictSdl useDefaultSdl; 218 219 /// This is the default mode. 220 /// $(OL 221 /// $(LI Causes parseSdlTag to throw SdlParseException if a tag's open brace appears after a newline) 222 /// $(LI Causes parseSdlTag to throw SdlParseException if any tag value appears after any tag attribute) 223 /// $(LI Causes parseSdlTag to set anonymous tag names to "content") 224 /// ) 225 void useStrictSdl() { 226 this.allowMixedValuesAndAttributes = false; 227 this.allowBraceAfterNewline = false; 228 this.allowChildTagsOnSameLineAsBrace = false; 229 this.anonymousTagNameIsNull = false; 230 this.acceptUnquotedStrings = false; 231 } 232 /// $(OL 233 /// $(LI Causes parseSdlTag to throw SdlParseException if a tag's open brace appears after a newline) 234 /// $(LI Causes parseSdlTag to throw SdlParseException if any tag value appears after any tag attribute) 235 /// $(LI Causes parseSdlTag to set anonymous tag names to "content") 236 /// ) 237 void useLooseSdl() { 238 this.allowMixedValuesAndAttributes = true; 239 this.allowBraceAfterNewline = true; 240 this.allowChildTagsOnSameLineAsBrace = true; 241 this.anonymousTagNameIsNull = false; 242 this.acceptUnquotedStrings = true; 243 } 244 /// $(OL 245 /// $(LI Causes parseSdlTag to allow a tag's open brace appears after any number of newlines) 246 /// $(LI Causes parseSdlTag to allow tag values an attributes to mixed in any order) 247 /// $(LI Causes parseSdlTag to set anonymous tag names to null) 248 /// ) 249 void useProposedSdl() { 250 this.allowMixedValuesAndAttributes = true; 251 this.allowBraceAfterNewline = true; 252 this.allowChildTagsOnSameLineAsBrace = true; 253 this.anonymousTagNameIsNull = true; 254 this.acceptUnquotedStrings = true; 255 } 256 257 /// The depth of the tag, all root tags are at depth 0. 258 size_t depth = 0; 259 260 /// The line number of the SDL parser after parsing this tag. 261 size_t line; 262 size_t column; 263 264 /// The namespace of the tag 265 const(char)[] namespace; 266 /// The name of the tag 267 const(char)[] name; 268 /// The values of the tag 269 auto values = appender!(const(char)[][])(); 270 /// The attributes of the tag 271 auto attributes = appender!(Attribute[])(); 272 /// Indicates the tag has an open brace 273 bool hasOpenBrace; 274 275 version(unittest) 276 { 277 // This function is only so unit tests can create Tags to compare 278 // with tags parsed from the parseSdlTag function. This constructor 279 // should never be called in production code 280 this(const(char)[] name, const(char)[][] values...) { 281 auto colonIndex = name.indexOf(':'); 282 if(colonIndex > -1) { 283 this.namespace = name[0..colonIndex]; 284 this.name = name[colonIndex+1..$]; 285 } else { 286 this.namespace.length = 0; 287 this.name = name; 288 } 289 foreach(value; values) { 290 291 const(char)[] attributeNamespace = ""; 292 size_t equalIndex = size_t.max; 293 294 // check if it is an attribute 295 if(value.length && isIDStart(value[0])) { 296 size_t i = 1; 297 while(true) { 298 if(i >= value.length) break; 299 auto c = value[i]; 300 if(!isID(value[i])) { 301 if(c == ':') { 302 if(attributeNamespace.length) throw new Exception("contained 2 colons?"); 303 attributeNamespace = value[0..i]; 304 i++; 305 continue; 306 } 307 if(value[i] == '=') { 308 equalIndex = i; 309 } 310 break; 311 } 312 i++; 313 } 314 } 315 316 if(equalIndex == size_t.max) { 317 this.values.put(value); 318 } else { 319 Attribute a = {attributeNamespace, value[attributeNamespace.length..equalIndex], value[equalIndex+1..$]}; 320 this.attributes.put(a); 321 } 322 323 } 324 } 325 } 326 327 /// Gets the tag ready to parse a new sdl tree by resetting the depth and the line number. 328 /// It is unnecessary to call this before parsing the first sdl tree but would not be harmful. 329 /// It does not reset the namespace/name/values/attributes because those will 330 /// be reset by the parser on the next call to parseSdlTag when it calls $(D resetForNextTag()). 331 void resetForNewSdl() { 332 depth = 0; 333 line = 1; 334 } 335 336 /// Resets the tag state to get ready to parse the next tag. 337 /// Should only be called by the parseSdlTag function. 338 /// This will clear the namespace/name/values/attributes and increment the depth if the current tag 339 /// had an open brace. 340 void resetForNextTag() 341 { 342 this.namespace.length = 0; 343 this.name = null; 344 if(hasOpenBrace) { 345 hasOpenBrace = false; 346 this.depth++; 347 } 348 this.values.clear(); 349 this.attributes.clear(); 350 } 351 352 /// Returns true if the tag is anonymous. 353 bool isAnonymous() 354 { 355 return anonymousTagNameIsNull ? this.name is null : this.name == "content"; 356 } 357 /// Sets the tag as anonymous 358 void setIsAnonymous() 359 { 360 this.name = anonymousTagNameIsNull ? null : "content"; 361 } 362 /// Returns: true if the tag namespaces/names/values/attributes are 363 /// the same even if the depth/line/options are different. 364 bool opEquals(ref Tag other) { 365 return 366 namespace == other.namespace && 367 name == other.name && 368 values.data == other.values.data && 369 attributes.data == other.attributes.data; 370 } 371 372 /// Returns: A string of the Tag not including it's children. The string will be valid SDL 373 /// by itself but will not include the open brace if it has one. Use toSdl for that. 374 string toString() { 375 string str = ""; 376 if(namespace.length) { 377 str ~= namespace; 378 str ~= name; 379 } 380 if(!isAnonymous || (values.data.length == 0 && attributes.data.length == 0)) { 381 str ~= name; 382 } 383 foreach(value; values.data) { 384 str ~= ' '; 385 str ~= value; 386 } 387 foreach(attribute; attributes.data) { 388 str ~= ' '; 389 if(attribute.namespace.length) { 390 str ~= attribute.namespace; 391 str ~= ':'; 392 } 393 str ~= attribute.id; 394 str ~= '='; 395 str ~= attribute.value; 396 } 397 return str; 398 } 399 400 /// Writes the tag as standard SDL to sink. 401 /// It will write the open brace '{' but since the tag does not have a knowledge 402 /// about it's children, its up to the caller to write the close brace '}' after it 403 /// writes the children to the sink. 404 void toSdl(S, string indent = " ")(S sink) if(isOutputRange!(S,const(char)[])) { 405 //writefln("[DEBUG] converting to sdl namespace=%s name=%s values=%s attr=%s", 406 //namespace, name, values.data, attributes.data); 407 for(auto i = 0; i < depth; i++) { 408 sink.put(indent); 409 } 410 if(namespace.length) { 411 sink.put(namespace); 412 sink.put(":"); 413 } 414 if(!isAnonymous || (values.data.length == 0 && attributes.data.length == 0)) { 415 if(namespace.length == 0 && isSdlKeyword(name)) { 416 sink.put(":"); // Escape tag names that are keywords 417 } 418 sink.put(name); 419 } 420 foreach(value; values.data) { 421 sink.put(" "); 422 sink.put(value); 423 } 424 foreach(attribute; attributes.data) { 425 sink.put(" "); 426 if(attribute.namespace.length) { 427 sink.put(attribute.namespace); 428 sink.put(":"); 429 } 430 sink.put(attribute.id); 431 sink.put("="); 432 sink.put(attribute.value); 433 } 434 if(hasOpenBrace) { 435 sink.put(" {\n"); 436 } else { 437 sink.put("\n"); 438 } 439 } 440 441 442 443 444 // 445 // User Methods 446 // 447 void throwIsUnknown() { 448 throw new SdlParseException(line, format("unknown tag '%s'", name)); 449 } 450 void throwIsDuplicate() { 451 throw new SdlParseException(line, format("tag '%s' appeared more than once", name)); 452 } 453 void getOneValue(T)(ref T value) { 454 if(values.data.length != 1) { 455 throw new SdlParseException 456 (line,format("tag '%s' %s 1 value but had %s", 457 name, (values.data.length == 0) ? "must have at least" : "can only have", values.data.length)); 458 } 459 460 const(char)[] literal = values.data[0]; 461 462 463 static if( isSomeString!T ) { 464 465 if(!value.empty) throwIsDuplicate(); 466 467 } else static if( isIntegral!T || isFloatingPoint!T ) { 468 469 //if( value != 0 ) throwIsDuplicate(); 470 471 } else { 472 473 } 474 475 if(!sdlLiteralToD!(T)(literal, value)) throw new SdlParseException(line, format("cannot convert '%s' to %s", literal, typeid(T))); 476 } 477 478 void getValues(T, bool allowAppend=false)(ref T[] t, size_t minCount = 1) { 479 if(values.data.length < minCount) throw new SdlParseException(line, format("tag '%s' must have at least %s value(s)", name, minCount)); 480 481 size_t arrayOffset; 482 if(t.ptr is null) { 483 arrayOffset = 0; 484 t = new T[values.data.length]; 485 } else if(allowAppend) { 486 arrayOffset = t.length; 487 t.length += values.data.length; 488 } else throwIsDuplicate(); 489 490 foreach(literal; values.data) { 491 static if( isSomeString!T ) { 492 if(literal[0] != '"') throw new SdlParseException(line, format("tag '%s' must have exactly one string literal but had another literal type", name)); 493 t[arrayOffset++] = literal[1..$-1]; // remove surrounding quotes 494 } else { 495 assert(0, format("Cannot convert sdl literal to D '%s' type", typeid(T))); 496 } 497 } 498 } 499 500 501 void enforceNoValues() { 502 if(values.data.length) throw new SdlParseException(line, format("tag '%s' cannot have any values", name)); 503 } 504 void enforceNoAttributes() { 505 if(attributes.data.length) throw new SdlParseException(line, format("tag '%s' cannot have any attributes", name)); 506 } 507 void enforceNoChildren() { 508 if(hasOpenBrace) throw new SdlParseException(line, format("tag '%s' cannot have any children", name)); 509 } 510 511 512 } 513 514 version = use_lookup_tables; 515 516 bool isSdlKeyword(const char[] token) { 517 return 518 token == "null" || 519 token == "true" || 520 token == "false" || 521 token == "on" || 522 token == "off"; 523 } 524 bool isIDStart(dchar c) { 525 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; 526 /+ 527 The lookup table doesn't seem to be as fast here, maybe this case I should just compare the ranges 528 version(use_lookup_tables) { 529 return (c < sdlLookup.length) ? ((sdlLookup[c] & idStartFlag) != 0) : false; 530 } else { 531 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; 532 } 533 +/ 534 } 535 bool isID(dchar c) { 536 version(use_lookup_tables) { 537 return (c < sdlLookup.length) ? ((sdlLookup[c] & sdlIDFlag) != 0) : false; 538 } else { 539 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c == '.' || c == '$'; 540 } 541 } 542 enum tooManyEndingBraces = "too many ending braces '}'"; 543 enum noEndingQuote = "string missing ending quote"; 544 enum invalidBraceFmt = "found '{' on a different line than its tag '%s'. fix the sdl by moving '{' to the same line"; 545 enum mixedValuesAndAttributesFmt = "SDL values cannot appear after attributes, bring '%s' in front of the attributes for tag '%s'"; 546 enum notEnoughCloseBracesFmt = "reached end of sdl but missing %s close brace(s) '}'"; 547 548 549 struct SdlParser(A) 550 { 551 char[] buffer; 552 A allocator; 553 char[] leftover; 554 this(char[] buffer, A allocator) 555 { 556 this.buffer = buffer; 557 this.allocator = allocator; 558 } 559 ref Sink parse(Source,Sink)(Source source, Sink sink) 560 if (isInputRange!Source && 561 isOutputRange!(Sink, ElementType!Source)) 562 { 563 // todo implement 564 } 565 } 566 567 568 569 /// Converts literal to the given D type T. 570 /// This is a wrapper arround the $(D sdlLiteralToD) function that returns true on sucess, except 571 /// this function returns the value itself and throws an SdlParseException on error. 572 T sdlLiteralToD(T)(const(char)[] literal) { 573 T value; 574 if(!sdlLiteralToD!(T)(literal, value)) 575 throw new SdlParseException(format("failed to convert '%s' to a %s", literal, typeid(T))); 576 return value; 577 } 578 579 /// Converts literal to the given D type T. 580 /// If isSomeString!T, then it will remove the surrounding quotes if they are present. 581 /// Returns: true on succes, false on failure 582 bool sdlLiteralToD(T)(const(char)[] literal, ref T t) { 583 584 assert(literal.length); 585 586 587 static if( is( T == bool) ) { 588 589 if(literal == "true" || literal == "on" || literal == "1") t = true; 590 if(literal == "false" || literal == "off" || literal == "0") t = false; 591 592 } else static if( isSomeString!T ) { 593 594 if(literal[0] == '"' && literal.length > 1 && literal[$-1] == '"') { 595 t = cast(T)literal[1..$-1]; 596 } else { 597 t = cast(T)literal; 598 } 599 600 } else static if( isIntegral!T || isFloatingPoint!T ) { 601 602 // remove any postfix characters 603 while(true) { 604 char c = literal[$-1]; 605 if(c >= '0' && c <= '9') break; 606 literal.length--; 607 if(literal.length == 0) return false; 608 } 609 610 t = to!T(literal); 611 612 } else { 613 614 t = to!T(literal); 615 616 } 617 618 return true; 619 } 620 621 622 623 624 625 string arrayRange(char min, char max, string initializer) { 626 string initializers = ""; 627 for(char c = min; c < max; c++) { 628 initializers ~= "'"~c~"': "~initializer~",\n"; 629 } 630 initializers ~= "'"~max~"': "~initializer; 631 return initializers; 632 } 633 string rangeInitializers(string[] s...) { 634 if(s.length % 2 != 0) assert(0, "must supply an even number of arguments to rangeInitializers"); 635 string code = "["~rangeInitializersCurrent(s); 636 //assert(0, code); // uncomment to see the code 637 return code; 638 } 639 string rangeInitializersCurrent(string[] s) { 640 string range = s[0]; 641 if(range[0] == '\'') { 642 if(range.length == 3 || (range.length == 4 && range[1] == '\\')) { 643 if(range[$-1] != '\'') throw new Exception(format("a single-character range %s started with an apostrophe (') but did not end with one", range)); 644 return range ~ ":" ~ s[1] ~ rangeInitializersNext(s); 645 } 646 } else { 647 throw new Exception(format("range '%s' not supported", range)); 648 } 649 char min = range[1]; 650 char max = range[5]; 651 return arrayRange(min, max, s[1]) ~ rangeInitializersNext(s); 652 } 653 string rangeInitializersNext(string[] s...) { 654 if(s.length <= 2) return "]"; 655 return ",\n"~rangeInitializersCurrent(s[2..$]); 656 } 657 658 659 enum ubyte sdlIDFlag = 0x01; 660 enum ubyte sdlNumberFlag = 0x02; 661 enum ubyte sdlNumberPostfixFlag = 0x04; 662 enum ubyte sdlValidAfterTagItemFlag = 0x08; 663 664 version(use_lookup_tables) { 665 mixin("private __gshared ubyte[256] sdlLookup = "~rangeInitializers 666 ("'_'" , "sdlIDFlag", 667 668 `'a'` , "sdlIDFlag", 669 `'b'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 670 `'c'` , "sdlIDFlag", 671 `'d'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 672 `'e'` , "sdlIDFlag", 673 `'f'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 674 `'g'-'k'`, "sdlIDFlag", 675 `'l'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 676 `'m'-'z'`, "sdlIDFlag", 677 678 `'A'` , "sdlIDFlag", 679 `'B'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 680 `'C'` , "sdlIDFlag", 681 `'D'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 682 `'E'` , "sdlIDFlag", 683 `'F'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 684 `'G'-'K'`, "sdlIDFlag", 685 `'L'` , "sdlIDFlag | sdlNumberFlag | sdlNumberPostfixFlag", 686 `'M'-'Z'`, "sdlIDFlag", 687 688 `'0'-'9'`, "sdlIDFlag | sdlNumberFlag", 689 `'-'` , "sdlIDFlag", 690 `'.'` , "sdlIDFlag | sdlNumberFlag", 691 `'$'` , "sdlIDFlag", 692 693 `' '` , "sdlValidAfterTagItemFlag", 694 `'\t'` , "sdlValidAfterTagItemFlag", 695 `'\n'` , "sdlValidAfterTagItemFlag", 696 `'\v'` , "sdlValidAfterTagItemFlag", 697 `'\f'` , "sdlValidAfterTagItemFlag", 698 `'\r'` , "sdlValidAfterTagItemFlag", 699 700 `'{'` , "sdlValidAfterTagItemFlag", 701 `'}'` , "sdlValidAfterTagItemFlag", 702 `';'` , "sdlValidAfterTagItemFlag", 703 `'\\'` , "sdlValidAfterTagItemFlag", 704 `'/'` , "sdlValidAfterTagItemFlag", 705 `'#'` , "sdlValidAfterTagItemFlag", 706 707 708 )~";"); 709 } 710 711 /// A convenience function to parse a single tag. 712 /// Calls $(D tag.resetForNewSdl) and then calls $(D parseSdlTag). 713 void parseOneSdlTag(Tag* tag, char[] sdlText) { 714 tag.resetForNewSdl(); 715 if(!parseSdlTag(tag, &sdlText)) throw new SdlParseException(tag.line, format("The sdl text '%s' did not contain any tags", sdlText)); 716 } 717 718 /// Parses one SDL tag (not including its children) from sdlText saving slices for every 719 /// name/value/attribute to the given tag struct. 720 /// This function assumes that sdlText contains at least one full SDL _tag. 721 /// The only time this function will allocate memory is if the value/attribute appenders 722 /// in the tag struct are not large enough to hold all the values. 723 /// Because of this, after the tag values/attributes are populated, it is up to the caller to copy 724 /// any memory they wish to save unless sdlText is going to persist in memory. 725 /// Note: this function does not handle the UTF-8 bom because it doesn't make sense to re-check 726 /// for the BOM before every tag. 727 /// Params: 728 /// tag = An address to a Tag structure to save the sdl tag's name/values/attributes. 729 /// sdlText = An address to the sdl text character array. 730 /// the function will move the front of the slice foward past 731 /// any sdl that was parsed. 732 /// Returns: true if a tag was found, false otherwise 733 /// Throws: SdlParseException or Utf8Exception 734 bool parseSdlTag(Tag* tag, char[]* sdlText) 735 { 736 // developer note: 737 // whenever reading the next character, the next pointer must be saved to cpos 738 // if the character could be used later, but if the next is guaranteed to 739 // be thrown away (such as when skipping till the next newline after a comment) 740 // then cpos does not need to be saved. 741 char *next = (*sdlText).ptr; 742 char *limit = next + sdlText.length; 743 744 745 tag.resetForNextTag(); // make sure this is done first 746 747 char* cpos; 748 dchar c; 749 char[] tokenNamespace; 750 char[] attributeID; 751 char[] token; 752 753 void enforceNoMoreTags() { 754 if(tag.depth > 0) throw new SdlParseException(tag.line, format(notEnoughCloseBracesFmt, tag.depth)); 755 } 756 757 void readNext() 758 { 759 cpos = next; 760 c = decodeUtf8(&next, limit); 761 } 762 763 bool isIDStart() { 764 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; 765 /+ 766 The lookup table actually seems to be slower in this case 767 version(use_lookup_tables) { 768 return (c < sdlLookup.length) ? ((sdlLookup[c] & idStartFlag) != 0) : false; 769 } else { 770 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; 771 } 772 +/ 773 } 774 bool isID() { 775 version(use_lookup_tables) { 776 return c < sdlLookup.length && ((sdlLookup[c] & sdlIDFlag) != 0); 777 } else { 778 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c == '.' || c == '$'; 779 } 780 } 781 bool isValidAfterTagItem() { 782 version(use_lookup_tables) { 783 return c < sdlLookup.length && ((sdlLookup[c] & sdlValidAfterTagItemFlag) != 0); 784 } else { 785 implement("isValidAfterTagItem without lookup table"); 786 } 787 } 788 bool isNumber() { 789 version(use_lookup_tables) { 790 return c < sdlLookup.length && ((sdlLookup[c] & sdlNumberFlag) != 0); 791 } else { 792 implement("isNumber without lookup table"); 793 } 794 } 795 bool isNumberPostfix() { 796 version(use_lookup_tables) { 797 return c < sdlLookup.length && ((sdlLookup[c] & sdlNumberPostfixFlag) != 0); 798 } else { 799 implement("isNumberPostfix without lookup table"); 800 } 801 } 802 803 // expected c/cpos to b pointing at a character before newline, so will ready first 804 // before checking for newlines 805 void toNextLine() 806 { 807 while(true) { 808 if(next >= limit) { return; } 809 c = decodeUtf8(&next, limit); // no need to save cpos since c will be thrown away 810 if(c == '\n') { tag.line++; return; } 811 } 812 } 813 814 // expects c/cpos to point at the first character of the id and for it to already be checked 815 // when this function is done, c/cpos will pointing to the first character after the id, or 816 // cpos == limit if there are no characters after the id 817 void parseID() 818 { 819 while(true) { 820 if(next >= limit) { cpos = limit; return; } 821 readNext(); 822 if(!isID()) return; 823 } 824 } 825 826 827 // expects c/cpos to point at the first character after the id 828 // Returns: true if the id is actually a value 829 // NOTE: this should only becaused if no namespace was found yet, this function 830 // will always return false if c/cpos is pointing to a ':' which indicates 831 // that it is a namespace even if the namespace could be a value like null or true/false 832 bool currentIDIsValue(char* startOfID) { 833 //switch on the length 834 switch(cpos - startOfID) { 835 case 0-1: return false; 836 case 2: return startOfID[0..2] == "on"; 837 case 3: return startOfID[0..3] == "off"; 838 case 4: return startOfID[0..4] == "null" || 839 startOfID[0..4] == "true"; 840 case 5: return startOfID[0..5] == "false"; 841 default: return false; 842 } 843 } 844 845 // Returns: true if a newline was found 846 // ExpectedState: 847 // c/cpos: points to the first character of the potential whitespace/comment 848 // ReturnState: 849 // c/cpos: points to the first character after all the whitespace/comments 850 bool skipWhitespaceAndComments() 851 { 852 LineNumber lineBefore = tag.line; 853 854 while(true) { 855 856 // TODO: maybe use a lookup table here 857 if(c == ' ' || c == '\t' || c =='\v' || c == '\f' || c == '\r') { 858 859 // do nothing (check first as this is the most likely case) 860 861 } else if(c == '\n') { 862 863 tag.line++; 864 865 } else if(c == '#') { 866 867 toNextLine(); 868 869 } else if(c == '-' || c == '/') { 870 871 if(next >= limit) return tag.line > lineBefore; 872 873 dchar secondChar = decodeUtf8(&next, limit); 874 875 if(secondChar == c) { // '--' or '//' 876 877 toNextLine(); 878 879 } else if(secondChar == '*') { 880 881 MULTILINE_COMMENT_LOOP: 882 while(next < limit) { 883 884 c = decodeUtf8(&next, limit); // no need to save cpos since c will be thrown away 885 if(c == '\n') { 886 tag.line++; 887 } else if(c == '*') { 888 // loop assume c is pointing to a '*' and next is pointing to the next characer 889 while(next < limit) { 890 891 c = decodeUtf8(&next, limit); 892 if(c == '/') break MULTILINE_COMMENT_LOOP; 893 if(c == '\n') { 894 tag.line++; 895 } else if(c != '*') { 896 break; 897 } 898 } 899 } 900 } 901 902 } else { 903 return tag.line > lineBefore; 904 } 905 906 } else { 907 return tag.line > lineBefore; 908 } 909 910 // 911 // Goto next character 912 // 913 if(next >= limit) {cpos = next; break;} 914 readNext(); 915 } 916 917 return tag.line > lineBefore; 918 } 919 920 921 // The atFirstToken will only parse tokens that are valid IDs 922 // Expected State: 923 // tokenNamespace/attributeID to be empty (sets these if they are found) 924 // c/cpos: points at the first character of the token 925 // Return State: 926 // If it finds a token, will set tokenNamespace and/or token with values 927 // c/cpos: points to the next character after the literal 928 // next: character after cpos 929 void tryParseToken(bool atFirstToken) { 930 token.length = 0; // clear the previous token 931 932 // Handles 933 // tag name 934 // tag namespace/ tag name 935 // unquoted string 936 // attribute 937 // attribute with namespace, does not handle the value after the attribute 938 if(isIDStart() || c == ':') { // colon for empty namespaces 939 auto start = cpos; 940 941 if(c != ':') { 942 while(true) { 943 cpos = next; 944 if(next >= limit) { token = start[0..next-start]; return; } 945 c = decodeUtf8(&next, limit); 946 if(!isID()) break; 947 } 948 } 949 950 // Handle Namespaces 951 if(c == ':') { 952 953 tokenNamespace = start[0..next-start]; // include the colon so the calling function 954 // can differentiate between the empty namespace 955 // and no namespace 956 cpos = next; 957 if(next >= limit) { token.length = 0; return; } 958 c = decodeUtf8(&next, limit); 959 960 if(!isIDStart()) { 961 if(atFirstToken) { 962 if(!isValidAfterTagItem()) throw new SdlParseException 963 (tag.line, format("Character '%s' cannot appear after the tag's namespace", c)); 964 return; 965 } 966 throw new SdlParseException(tag.line, 967 format("expected alphanum or '_' after an attribute namespace colon, but got '%s'", c)); 968 } 969 970 start = cpos; 971 while(true) { 972 cpos = next; 973 if(next >= limit) { token = start[0..next-start]; return; } 974 c = decodeUtf8(&next, limit); 975 if(!isID()) break; 976 } 977 978 } 979 980 // Handle attributes 981 if(c != '=') { 982 if(!isValidAfterTagItem()) throw new SdlParseException 983 (tag.line, format("Character '%s' cannot appear after tag item", c)); 984 token = start[0..next-start-1]; 985 return; 986 } 987 988 // Setup to parse the value 989 attributeID = start[0..next-start-1]; // subtract 1 for the '=' 990 if(next >= limit) throw new SdlParseException 991 (tag.line, format("sdl cannot end with '=' character")); 992 cpos = next; 993 c = decodeUtf8(&next, limit); 994 } 995 996 997 // Handles: keywords and, 998 // unquoted strings in attribute values 999 if(isIDStart()) { 1000 auto start = cpos; 1001 1002 while(true) { 1003 cpos = next; 1004 if(next >= limit) { token = start[0..next-start]; return; } 1005 c = decodeUtf8(&next, limit); 1006 if(!isID()) break; 1007 } 1008 1009 if(!isValidAfterTagItem()) throw new SdlParseException 1010 (tag.line, format("Character '%s' cannot appear after a tag item", c)); 1011 1012 token = start[0..next-start-1]; 1013 return; 1014 } 1015 1016 if(atFirstToken) return; 1017 1018 if(c == '"') { 1019 1020 bool containsEscapes = false; 1021 1022 while(true) { 1023 1024 if(next >= limit) throw new SdlParseException(tag.line, noEndingQuote); 1025 c = decodeUtf8(&next, limit); // no need to save cpos since c will be thrown away 1026 if(c == '"') break; 1027 if(c == '\\') { 1028 containsEscapes = true; 1029 if(next >= limit) throw new SdlParseException(tag.line, noEndingQuote); 1030 // NOTE TODO: remember to handle escaped newlines 1031 c = decodeUtf8(&next, limit); 1032 } else if(c == '\n') { 1033 throw new SdlParseException(tag.line, noEndingQuote); 1034 } 1035 1036 } 1037 1038 if(containsEscapes) { 1039 1040 /* do something differnt if immuable */ 1041 implement("escaped strings"); 1042 1043 } else { 1044 token = cpos[0..next - cpos]; 1045 } 1046 1047 cpos = next; 1048 if( next < limit) c = decodeUtf8(&next, limit); 1049 1050 } else if(c == '`') { 1051 1052 implement("tick strings"); 1053 1054 } else if(c >= '0' && c <= '9' || c == '-' || c == '.') { 1055 1056 auto start = cpos; 1057 1058 while(true) { 1059 cpos = next; 1060 if(next >= limit) { token = start[0..cpos-start]; break; } 1061 c = decodeUtf8(&next, limit); 1062 if(!isNumber()) { token = start[0..cpos-start]; break; } 1063 1064 //if(tag.rejectTypedNumbers && isNumberPostfix()) 1065 //throw new SdlParseException(tag.line, "using this sdl mode, postfix characters indicating the type after a number are not allowed"); 1066 } 1067 1068 } else if(c == '\'') { 1069 1070 implement("sing-quoted characters"); 1071 1072 } 1073 } 1074 1075 // 1076 // Read the first character 1077 // 1078 if(next >= limit) { enforceNoMoreTags(); goto RETURN_NO_TAG; } 1079 readNext(); 1080 1081 while(true) { 1082 1083 skipWhitespaceAndComments(); 1084 if(cpos >= limit) { enforceNoMoreTags(); goto RETURN_NO_TAG; } 1085 1086 // 1087 // 1088 // Get the tag name/namespace 1089 // 1090 tokenNamespace.length = 0; 1091 attributeID.length = 0; 1092 tryParseToken(true); 1093 1094 if(token.length || tokenNamespace.length) { 1095 1096 if(attributeID.length) { 1097 1098 tag.setIsAnonymous(); 1099 1100 if(tokenNamespace.length) tokenNamespace = tokenNamespace[0..$-1]; // Remove ending ':' on namespace 1101 Attribute attribute = {tokenNamespace, attributeID, token}; 1102 tag.attributes.put(attribute); 1103 1104 } else { 1105 1106 if(tokenNamespace.length) { 1107 tag.namespace = tokenNamespace[0..$-1]; 1108 if(token.length) { 1109 tag.name = token; 1110 } else { 1111 tag.setIsAnonymous(); 1112 if(tag.namespace.length == 0) 1113 throw new SdlParseException(tag.line, "A tag cannot have an empty namespace and an empty name"); 1114 } 1115 } else { 1116 1117 if(isSdlKeyword(token)) { 1118 tag.setIsAnonymous(); 1119 tag.values.put(token); 1120 } else { 1121 tag.name = token; 1122 } 1123 1124 } 1125 1126 } 1127 1128 } else if(c == '}') { 1129 1130 if(tag.depth == 0) throw new SdlParseException(tag.line, tooManyEndingBraces); 1131 tag.depth--; 1132 1133 // Read the next character 1134 if(next >= limit) { enforceNoMoreTags(); goto RETURN_NO_TAG; } 1135 cpos = next; 1136 c = decodeUtf8(&next, limit); 1137 1138 continue; 1139 1140 } else if(c == '\\') { 1141 throw new SdlParseException(tag.line, "expected tag or '}' but got backslash '\\'"); 1142 } else if(c == '{') { 1143 throw new SdlParseException(tag.line, "expected tag or '}' but got '{'"); 1144 } else if(c == ':') { 1145 throw new SdlParseException(tag.line, "expected tag or '}' but got ':'"); 1146 } else { 1147 1148 tag.namespace.length = 0; 1149 tag.setIsAnonymous(); 1150 1151 } 1152 1153 // 1154 // 1155 // Found a valid tag, now get values and attributes 1156 // 1157 // 1158 GET_VALUES_AND_ATTRIBUTES: 1159 while(true) { 1160 1161 // At the beginning of this loop, it is expected that c/cpos will be pointing the 1162 // next character after the last thing (tag/value/attribute) 1163 1164 if(cpos >= limit) goto RETURN_TAG; // I may not need this check 1165 auto foundNewline = skipWhitespaceAndComments(); 1166 if(cpos >= limit) goto RETURN_TAG; 1167 if(foundNewline) { 1168 1169 // check if it is a curly brace to either print a useful error message 1170 // or determine if the tag has children 1171 if(c != '{') { 1172 next = cpos; // rewind so whatever character it is will 1173 // be parsed again on the next call 1174 goto RETURN_TAG; 1175 } 1176 if(tag.allowBraceAfterNewline) { 1177 tag.hasOpenBrace = true; 1178 goto RETURN_TAG; 1179 } 1180 1181 throw new SdlParseException(SdlErrorType.braceAfterNewline, tag.line, 1182 format(invalidBraceFmt, tag.name)); 1183 } 1184 1185 // 1186 // At this point c must contain a non-whitespace character 1187 // and we must have already parsed the tag name 1188 // 1189 1190 if(c == ';') goto RETURN_TAG; 1191 1192 // 1193 // Handle the '\' character to escape newlines 1194 // 1195 if(c == '\\') { 1196 if(next >= limit) goto RETURN_TAG; // (check to make sure ending an sdl file with a backslash is ok) 1197 c = decodeUtf8(&next, limit); 1198 1199 foundNewline = skipWhitespaceAndComments(); 1200 if(cpos >= limit) goto RETURN_TAG; 1201 if(!foundNewline) throw new SdlParseException(tag.line, "only comments/whitespace can follow a backslash '\\'"); 1202 1203 continue; 1204 } 1205 1206 if(c == '{') { 1207 tag.hasOpenBrace = true; // depth will be incremented at the next parse 1208 goto RETURN_TAG; 1209 } 1210 1211 if(c == '}') { 1212 if(tag.depth == 0) throw new SdlParseException(tag.line, tooManyEndingBraces); 1213 next = cpos; // rewind so the '}' will be seen on the next call and 1214 // the depth will change on the next call 1215 goto RETURN_TAG; 1216 } 1217 1218 // 1219 // Try to parse an attribute 1220 // 1221 tokenNamespace.length = 0; 1222 attributeID.length = 0; 1223 tryParseToken(false); 1224 if(token.length || tokenNamespace.length) { 1225 1226 if(attributeID.length > 0) { 1227 if(tokenNamespace.length) tokenNamespace = tokenNamespace[0..$-1]; // Remove ending ':' on namespace 1228 Attribute attribute = {tokenNamespace, attributeID, token}; 1229 tag.attributes.put(attribute); 1230 } else { 1231 1232 if(tokenNamespace.length) 1233 throw new SdlParseException(tag.line, format("Found a tag value with a namespace '%s%s'?", tokenNamespace, token)); 1234 1235 if(tag.attributes.data.length) { 1236 if(!tag.allowMixedValuesAndAttributes) 1237 throw new SdlParseException(SdlErrorType.mixedValuesAndAttributes, tag.line, 1238 format(mixedValuesAndAttributesFmt, token, tag.name)); 1239 } 1240 1241 tag.values.put(token); 1242 } 1243 1244 if(cpos >= limit) goto RETURN_TAG; 1245 1246 } else { 1247 1248 if(attributeID.length > 0) throw new SdlParseException(tag.line, "expected sdl literal to follow attribute '=' but was not a literal"); 1249 1250 if(c == '\0') throw new Exception("possible code bug: found null"); 1251 throw new Exception(format("Unhandled character '%s' (code=0x%x)", c, cast(uint)c)); 1252 1253 } 1254 } 1255 1256 } 1257 1258 assert(0); 1259 1260 RETURN_TAG: 1261 *sdlText = next[0..limit-next]; 1262 return true; 1263 1264 RETURN_NO_TAG: 1265 (*sdlText) = limit[0..0]; 1266 return false; 1267 } 1268 1269 version(unittest) 1270 { 1271 char[2048] sdlBuffer; 1272 char[sdlBuffer.length] sdlBuffer2; 1273 char[] setupSdlText(const(char[]) sdlText, bool copySdl) 1274 { 1275 if(!copySdl) return cast(char[])sdlText; 1276 1277 if(sdlText.length >= sdlBuffer.length) throw new Exception(format("attempting to copy sdl of length %s but sdlBuffer is only of length %s", sdlText.length, sdlBuffer.length)); 1278 sdlBuffer[0..sdlText.length] = sdlText; 1279 return sdlBuffer[0..sdlText.length]; 1280 } 1281 1282 struct SdlBuffer2Sink 1283 { 1284 size_t offset; 1285 @property 1286 char[] slice() { return sdlBuffer2[0..offset]; } 1287 void put(inout(char)[] value) { 1288 sdlBuffer2[offset..offset+value.length] = value; 1289 offset += value.length; 1290 } 1291 } 1292 1293 } 1294 1295 1296 unittest 1297 { 1298 //return; // Uncomment to disable these tests 1299 1300 mixin(scopedTest!"SdlParse"); 1301 1302 Tag parsedTag; 1303 1304 void useProposed() { 1305 debug writefln("[TEST] SdlMode: Proposed"); 1306 parsedTag.useProposedSdl(); 1307 } 1308 void useStrict() { 1309 debug writefln("[TEST] SdlMode: Strict"); 1310 parsedTag.useStrictSdl(); 1311 } 1312 struct SdlTest 1313 { 1314 bool copySdl; 1315 string sdlText; 1316 Tag[] expectedTags; 1317 size_t line; 1318 this(string sdlText, Tag[] expectedTags, size_t line = __LINE__) { 1319 this.copySdl = false; 1320 this.sdlText = sdlText; 1321 this.expectedTags = expectedTags; 1322 this.line = line; 1323 } 1324 } 1325 1326 void testParseSdl(bool reparse = true)(bool copySdl, const(char)[] sdlText, Tag[] expectedTags = [], size_t line = __LINE__) 1327 { 1328 size_t previousDepth = size_t.max; 1329 SdlBuffer2Sink buffer2Sink; 1330 1331 auto escapedSdlText = escape(sdlText); 1332 1333 debug { 1334 static if(reparse) { 1335 writefln("[TEST] testing sdl '%s'", escapedSdlText); 1336 } else { 1337 writefln("[TEST] testing sdl (regenerated)'%s'", escapedSdlText); 1338 } 1339 } 1340 1341 char[] next = setupSdlText(sdlText, copySdl); 1342 1343 parsedTag.resetForNewSdl(); 1344 1345 1346 try { 1347 1348 for(auto i = 0; i < expectedTags.length; i++) { 1349 if(!parseSdlTag(&parsedTag, &next)) { 1350 writefln("Expected %s tag(s) but only got %s", expectedTags.length, i); 1351 writefln("Error: test on line %s", line); 1352 assert(0); 1353 } 1354 1355 static if(reparse) { 1356 if(previousDepth != size_t.max) { 1357 while(previousDepth > parsedTag.depth) { 1358 buffer2Sink.put("}"); 1359 previousDepth--; 1360 } 1361 } 1362 } 1363 1364 auto expectedTag = expectedTags[i]; 1365 if(parsedTag.namespace != expectedTag.namespace) { 1366 writefln("Error: expected tag namespace '%s' but got '%s'", expectedTag.namespace, parsedTag.namespace); 1367 writefln("Error: test on line %s", line); 1368 assert(0); 1369 } 1370 if(parsedTag.name != expectedTag.name) { 1371 writefln("Error: expected tag name '%s' but got '%s'", expectedTag.name, parsedTag.name); 1372 writefln("Error: test on line %s", line); 1373 assert(0); 1374 } 1375 //writefln("[DEBUG] expected value '%s', actual values '%s'", expectedTag.values.data, parsedTag.values.data); 1376 if(parsedTag.values.data != expectedTag.values.data) { 1377 writefln("Error: expected tag values '%s' but got '%s'", expectedTag.values.data, parsedTag.values.data); 1378 writefln("Error: test on line %s", line); 1379 assert(0); 1380 } 1381 if(parsedTag.attributes.data != expectedTag.attributes.data) { 1382 writefln("Error: expected tag attributes '%s' but got '%s'", expectedTag.attributes.data, parsedTag.attributes.data); 1383 writefln("Error: test on line %s", line); 1384 assert(0); 1385 } 1386 1387 // put the tag into the buffer2 sink to reparse again after 1388 static if(reparse) { 1389 parsedTag.toSdl(&buffer2Sink); 1390 previousDepth = parsedTag.depth; 1391 if(parsedTag.hasOpenBrace) previousDepth++; 1392 } 1393 } 1394 1395 if(parseSdlTag(&parsedTag, &next)) { 1396 writefln("Expected %s tag(s) but got at least one more (depth=%s, name='%s')", 1397 expectedTags.length, parsedTag.depth, parsedTag.name); 1398 writefln("Error: test on line %s", line); 1399 assert(0); 1400 } 1401 1402 } catch(SdlParseException e) { 1403 writefln("[TEST] this sdl threw an unexpected SdlParseException: '%s'", escape(sdlText)); 1404 writeln(e); 1405 writefln("Error: test on line %s", line); 1406 assert(0); 1407 } catch(Exception e) { 1408 writefln("[TEST] this sdl threw an unexpected Exception: '%s'", escape(sdlText)); 1409 writeln(e); 1410 writefln("Error: test on line %s", line); 1411 assert(0); 1412 } 1413 1414 static if(reparse) { 1415 if(previousDepth != size_t.max) { 1416 while(previousDepth > parsedTag.depth) { 1417 buffer2Sink.put("}"); 1418 previousDepth--; 1419 } 1420 } 1421 1422 if(buffer2Sink.slice != sdlText && 1423 (buffer2Sink.slice.length && buffer2Sink.slice[0..$-1] != sdlText)) { 1424 testParseSdl!false(false, buffer2Sink.slice, expectedTags, line); 1425 } 1426 } 1427 1428 } 1429 1430 void testInvalidSdl(bool copySdl, const(char)[] sdlText, SdlErrorType expectedErrorType = SdlErrorType.unknown, size_t line = __LINE__) { 1431 auto escapedSdlText = escape(sdlText); 1432 debug writefln("[TEST] testing invalid sdl '%s'", escapedSdlText); 1433 1434 SdlErrorType actualErrorType = SdlErrorType.unknown; 1435 1436 char[] next = setupSdlText(sdlText, copySdl); 1437 1438 parsedTag.resetForNewSdl(); 1439 try { 1440 while(parseSdlTag(&parsedTag, &next)) { } 1441 writefln("Error: invalid sdl was successfully parsed: %s", sdlText); 1442 writefln("Error: test was on line %s", line); 1443 assert(0); 1444 } catch(SdlParseException e) { 1445 debug writefln("[TEST] got expected error: %s", e.msg); 1446 actualErrorType = e.type; 1447 } catch(Utf8Exception e) { 1448 debug writefln("[TEST] got expected error: %s", e.msg); 1449 } 1450 1451 if(expectedErrorType != SdlErrorType.unknown && 1452 expectedErrorType != actualErrorType) { 1453 writefln("Error: expected error '%s' but got error '%s'", expectedErrorType, actualErrorType); 1454 writefln("Error: test was on line %s", line); 1455 assert(0); 1456 } 1457 1458 } 1459 1460 testParseSdl(false, ""); 1461 testParseSdl(false, " "); 1462 testParseSdl(false, "\n"); 1463 1464 testParseSdl(false, "#Comment"); 1465 testParseSdl(false, "#Comment copyright \u00a8"); 1466 testParseSdl(false, "#Comment\n"); 1467 testParseSdl(false, "#Comment\r\n"); 1468 testParseSdl(false, " # Comment\r\n"); 1469 1470 testParseSdl(false, " -- Comment\n"); 1471 testParseSdl(false, " ------ Comment\n"); 1472 1473 testParseSdl(false, " # Comment1 \r\n -- Comment 2"); 1474 1475 1476 testParseSdl(false, " // Comment\n"); 1477 testParseSdl(false, " //// Comment\n"); 1478 1479 testParseSdl(false, "/* a multiline comment \n\r\n\n\n\t hello stuff # -- // */"); 1480 1481 // TODO: test this using the allowBracesAfterNewline option 1482 // testParseSdl(false, "tag /*\n\n*/{ child }", Tag("tag"), Tag("child")); 1483 1484 1485 testParseSdl(false, "a", [Tag("a")]); 1486 testParseSdl(false, "ab", [Tag("ab")]); 1487 testParseSdl(false, "abc", [Tag("abc")]); 1488 testParseSdl(false, "firsttag", [Tag("firsttag")]); 1489 testParseSdl(false, "funky._-$tag", [Tag("funky._-$tag")]); 1490 1491 1492 { 1493 auto prefixes = ["", " ", "\t", "--comment\n"]; 1494 foreach(prefix; prefixes) { 1495 testInvalidSdl(false, prefix~":"); 1496 } 1497 } 1498 1499 1500 auto validCharactersAfterTag = 1501 [" ", "\t", "\n", "\v", "\f", "\r", ";", "//", "\\", "#", "{}"]; 1502 1503 1504 auto namespaces = ["a:", "ab:", "abc:"]; 1505 bool isProposedSdl = false; 1506 while(true) { 1507 string tagName; 1508 if(isProposedSdl) { 1509 tagName = null; 1510 useProposed(); 1511 } else { 1512 tagName = "content"; 1513 } 1514 foreach(namespace; namespaces) { 1515 testParseSdl(false, namespace, [Tag(namespace~tagName)]); 1516 foreach(suffix; validCharactersAfterTag) { 1517 testParseSdl(false, namespace~suffix, [Tag(namespace~tagName)]); 1518 } 1519 testParseSdl(false, "tag1{"~namespace~"}", [Tag("tag1"), Tag(namespace~tagName)]); 1520 } 1521 if(isProposedSdl) break; 1522 isProposedSdl = true; 1523 } 1524 useStrict(); 1525 1526 1527 testParseSdl(false, "a:a", [Tag("a:a")]); 1528 testParseSdl(false, "ab:a", [Tag("ab:a")]); 1529 1530 testParseSdl(false, "a:ab", [Tag("a:ab")]); 1531 testParseSdl(false, "ab:ab", [Tag("ab:ab")]); 1532 1533 testParseSdl(false, "html:table", [Tag("html:table")]); 1534 1535 testParseSdl(false, ";", [Tag("content")]); 1536 testParseSdl(false, "myid;", [Tag("myid")]); 1537 testParseSdl(false, "myid; ", [Tag("myid")]); 1538 testParseSdl(false, "myid #comment", [Tag("myid")]); 1539 testParseSdl(false, "myid # comment \n", [Tag("myid")]); 1540 testParseSdl(false, "myid -- comment \n # more comments\n", [Tag("myid")]); 1541 1542 1543 testParseSdl(false, "myid /* multiline comment */", [Tag("myid")]); 1544 testParseSdl(false, "myid /* multiline comment */ ", [Tag("myid")]); 1545 testParseSdl(false, "myid /* multiline comment */\n", [Tag("myid")]); 1546 testParseSdl(false, "myid /* multiline comment \n\n */", [Tag("myid")]); 1547 testParseSdl(false, "myid /* multiline comment **/ \"value\"", [Tag("myid", `"value"`)]); 1548 testParseSdl(false, "myid /* multiline comment \n\n */another-id", [Tag("myid"), Tag("another-id")]); 1549 testParseSdl(false, "myid /* multiline comment */ \"value\"", [Tag("myid", `"value"`)]); 1550 testParseSdl(false, "myid /* multiline comment \n */ \"value\"", [Tag("myid"), Tag("content", `"value"`)]); 1551 testInvalidSdl(false, "myid /* multiline comment \n */ { \n }"); 1552 useProposed(); 1553 testParseSdl(false, "myid /* multiline comment */ { \n }", [Tag("myid")]); 1554 testParseSdl(false, "myid /* multiline comment \n */ \"value\"", [Tag("myid"), Tag(null, `"value"`)]); 1555 useStrict(); 1556 1557 1558 testParseSdl(false, "tag1\ntag2", [Tag("tag1"), Tag("tag2")]); 1559 testParseSdl(false, "tag1;tag2\ntag3", [Tag("tag1"), Tag("tag2"), Tag("tag3")]); 1560 1561 testInvalidSdl(false, "myid {"); 1562 testInvalidSdl(false, "myid {\n\n"); 1563 1564 testInvalidSdl(false, "{}"); 1565 1566 testParseSdl(false, "tag1{}", [Tag("tag1")]); 1567 testParseSdl(false, "tag1{}tag2", [Tag("tag1"), Tag("tag2")]); 1568 testParseSdl(false, "tag1{}\ntag2", [Tag("tag1"), Tag("tag2")]); 1569 1570 testParseSdl(false, "tag1{tag1.1}tag2", [Tag("tag1"), Tag("tag1.1"), Tag("tag2")]); 1571 1572 // 1573 // Handling the backslash '\' character 1574 // 1575 testInvalidSdl(false, "\\"); // slash must in the context of a tag 1576 testInvalidSdl(false, `tag \ x`); 1577 1578 testParseSdl(false, "tag\\", [Tag("tag")]); // Make sure this is valid sdl 1579 testParseSdl(false, "tag \\ \n \\ \n \"hello\"", [Tag("tag", `"hello"`)]); 1580 1581 // 1582 // Test the keywords (white box tests trying to attain full code coverage) 1583 // 1584 auto keywords = ["null", "true", "false", "on", "off"]; 1585 1586 foreach(keyword; keywords) { 1587 testParseSdl(false, keyword, [Tag("content", keyword)]); 1588 } 1589 1590 namespaces = ["", "n:", "namespace:"]; 1591 foreach(namespace; namespaces) { 1592 sdlBuffer[0..namespace.length] = namespace; 1593 auto afterTagName = namespace.length + 4; 1594 sdlBuffer[namespace.length..afterTagName] = "tag "; 1595 string expectedTagName = namespace~"tag"; 1596 1597 foreach(keyword; keywords) { 1598 for(auto cutoff = 1; cutoff < keyword.length; cutoff++) { 1599 sdlBuffer[afterTagName..afterTagName+cutoff] = keyword[0..cutoff]; 1600 //testInvalidSdl(false, sdlBuffer[0..afterTagName+cutoff]); 1601 testParseSdl(false, sdlBuffer[0..afterTagName+cutoff], [Tag(expectedTagName, sdlBuffer[afterTagName..afterTagName+cutoff])]); 1602 } 1603 } 1604 auto suffixes = [";", " \t;", "\n", "{}", " \t {\n }"]; 1605 foreach(keyword; keywords) { 1606 auto limit = afterTagName+keyword.length; 1607 1608 sdlBuffer[afterTagName..limit] = keyword; 1609 testParseSdl(false, sdlBuffer[0..limit], [Tag(expectedTagName, keyword)]); 1610 1611 foreach(suffix; suffixes) { 1612 sdlBuffer[limit..limit+suffix.length] = suffix; 1613 testParseSdl(false, sdlBuffer[0..limit+suffix.length], [Tag(expectedTagName, keyword)]); 1614 } 1615 } 1616 foreach(keyword; keywords) { 1617 1618 foreach(attrNamespace; namespaces) { 1619 1620 for(auto cutoff = 1; cutoff <= keyword.length; cutoff++) { 1621 auto limit = afterTagName + attrNamespace.length; 1622 sdlBuffer[afterTagName..limit] = attrNamespace; 1623 limit += cutoff; 1624 sdlBuffer[limit - cutoff..limit] = keyword[0..cutoff]; 1625 sdlBuffer[limit..limit+8] = `="value"`; 1626 testParseSdl(false, sdlBuffer[0..limit+8], [Tag(expectedTagName, format(`%s%s="value"`, attrNamespace, keyword[0..cutoff]))]); 1627 1628 foreach(otherKeyword; keywords) { 1629 sdlBuffer[limit+1..limit+1+otherKeyword.length] = otherKeyword; 1630 testParseSdl(false, sdlBuffer[0..limit+1+otherKeyword.length], 1631 [Tag(expectedTagName, format("%s%s=%s", attrNamespace, keyword[0..cutoff], otherKeyword))]); 1632 } 1633 } 1634 1635 } 1636 1637 } 1638 } 1639 1640 1641 1642 1643 // 1644 // String Literals 1645 // 1646 testParseSdl(false, `a "apple"`, [Tag("a", `"apple"`)]); 1647 testParseSdl(false, "a \"pear\"\n", [Tag("a", `"pear"`)]); 1648 testParseSdl(false, "a \"left\"\nb \"right\"", [Tag("a", `"left"`), Tag("b", `"right"`)]); 1649 testParseSdl(false, "a \"cat\"\"dog\"\"bear\"\n", [Tag("a", `"cat"`, `"dog"`, `"bear"`)]); 1650 testParseSdl(false, "a \"tree\";b \"truck\"\n", [Tag("a", `"tree"`), Tag("b", `"truck"`)]); 1651 1652 1653 // 1654 // Unquoted Strings 1655 // 1656 testParseSdl(false, "tag string", [Tag("tag", "string")]); 1657 testParseSdl(false, "tag attr=string", [Tag("tag", "attr=string")]); 1658 1659 1660 1661 // 1662 // Attributes 1663 // 1664 testParseSdl(false, "tag attr=null", [Tag("tag", "attr=null")]); 1665 testParseSdl(false, "tag \"val\" attr=null", [Tag("tag", `"val"`, "attr=null")]); 1666 1667 auto mixedValuesAndAttributesTests = 1668 [ 1669 SdlTest("tag attr=null \"val\"", [Tag("tag", "attr=null", `"val"`)] ) 1670 ]; 1671 1672 foreach(test; mixedValuesAndAttributesTests) { 1673 testInvalidSdl(test.copySdl, test.sdlText, SdlErrorType.mixedValuesAndAttributes); 1674 } 1675 useProposed(); 1676 foreach(test; mixedValuesAndAttributesTests) { 1677 testParseSdl(test.copySdl, test.sdlText, test.expectedTags); 1678 } 1679 useStrict(); 1680 1681 foreach(suffix; validCharactersAfterTag) { 1682 testParseSdl(false, "tag attr=null"~suffix, [Tag("tag", "attr=null")]); 1683 testParseSdl(false, "tag attr=true"~suffix, [Tag("tag", "attr=true")]); 1684 testParseSdl(false, "tag attr=unquoted"~suffix, [Tag("tag", "attr=unquoted")]); 1685 testParseSdl(false, "tag attr=\"quoted\""~suffix, [Tag("tag", "attr=\"quoted\"")]); 1686 testParseSdl(false, "tag attr=1234"~suffix, [Tag("tag", "attr=1234")]); 1687 1688 testParseSdl(false, "attr=null"~suffix, [Tag("content", "attr=null")]); 1689 testParseSdl(false, "attr=true"~suffix, [Tag("content", "attr=true")]); 1690 testParseSdl(false, "attr=unquoted"~suffix, [Tag("content", "attr=unquoted")]); 1691 testParseSdl(false, "attr=\"quoted\""~suffix, [Tag("content", "attr=\"quoted\"")]); 1692 testParseSdl(false, "attr=1234"~suffix, [Tag("content", "attr=1234")]); 1693 } 1694 1695 // 1696 // Test parsing numbers without extracting them 1697 // 1698 enum numberPostfixes = ["", "l", "L", "f", "F", "d", "D", "bd", "BD"]; 1699 { 1700 enum sdlPostfixes = ["", " ", ";", "\n"]; 1701 1702 auto numbers = ["0", "12", "9876", "5432", /*".1",*/ "0.1", "12.4", /*"1.",*/ "8.04", "123.l"]; 1703 1704 1705 for(size_t negative = 0; negative < 2; negative++) { 1706 string prefix = negative ? "-" : ""; 1707 1708 foreach(postfix; numberPostfixes) { 1709 foreach(number; numbers) { 1710 1711 auto testNumber = prefix~number~postfix; 1712 1713 if(postfix.length) { 1714 useProposed(); 1715 //testInvalidSdl(false, "tag "~testNumber); 1716 useStrict(); 1717 } 1718 //testInvalidSdl(false, "tag "~testNumber~"="); 1719 1720 foreach(sdlPostfix; sdlPostfixes) { 1721 testParseSdl(false, "tag "~testNumber~sdlPostfix, [Tag("tag", testNumber)]); 1722 } 1723 } 1724 } 1725 1726 1727 } 1728 } 1729 1730 // 1731 // Test parsing numbers and extracting them 1732 // 1733 { 1734 for(size_t negative = 0; negative < 2; negative++) { 1735 string prefix = negative ? "-" : ""; 1736 1737 foreach(postfix; numberPostfixes) { 1738 1739 void testNumber(Types...)(ulong expectedValue) { 1740 long expectedSignedValue = negative ? -1 * (cast(long)expectedValue) : cast(long)expectedValue; 1741 1742 foreach(Type; Types) { 1743 if(negative && isUnsigned!Type) continue; 1744 if(expectedSignedValue > Type.max) continue; 1745 static if( is(Type == float) || is(Type == double) || is(Type == real)) { 1746 if(expectedSignedValue < Type.min_normal) continue; 1747 } else { 1748 if(expectedSignedValue < Type.min) continue; 1749 } 1750 1751 debug writefln("[DEBUG] testing %s on %s", typeid(Type), parsedTag.values.data[0]); 1752 Type t; 1753 parsedTag.getOneValue(t); 1754 assert(t == cast(Type) expectedSignedValue, format("Expected (%s) %s but got %s", typeid(Type), expectedSignedValue, t)); 1755 } 1756 } 1757 void testDecimalNumber(Types...)(real expectedValue) { 1758 foreach(Type; Types) { 1759 if(negative && isUnsigned!Type) continue; 1760 if(expectedValue > Type.max) continue; 1761 static if( is(Type == float) || is(Type == double) || is(Type == real)) { 1762 if(expectedValue < Type.min_normal) continue; 1763 } else { 1764 if(expectedValue < Type.min) continue; 1765 } 1766 1767 debug writefln("[DEBUG] testing %s on %s", typeid(Type), parsedTag.values.data[0]); 1768 Type t; 1769 parsedTag.getOneValue(t); 1770 assert(t - cast(Type) expectedValue < .01, format("Expected (%s) %s but got %s", typeid(Type), cast(Type)expectedValue, t)); 1771 } 1772 } 1773 1774 alias testNumber!(byte,ubyte,short,ushort,int,uint,long,ulong,float,double,real) testNumberOnAllTypes; 1775 alias testDecimalNumber!(float,double,real) testDecimalNumberOnAllTypes; 1776 1777 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"0"~postfix); 1778 testNumberOnAllTypes(0); 1779 1780 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"1"~postfix); 1781 testNumberOnAllTypes(1); 1782 1783 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"12"~postfix); 1784 testNumberOnAllTypes(12); 1785 1786 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"9987"~postfix); 1787 testNumberOnAllTypes(9987); 1788 1789 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"0.0"~postfix); 1790 testDecimalNumberOnAllTypes(0.0); 1791 1792 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~".1"~postfix); 1793 testDecimalNumberOnAllTypes(0.1); 1794 1795 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~".000001"~postfix); 1796 testDecimalNumberOnAllTypes(0.000001); 1797 1798 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"100384.999"~postfix); 1799 testDecimalNumberOnAllTypes(100384.999); 1800 1801 parseOneSdlTag(&parsedTag, cast(char[])"tag "~prefix~"3.14159265"~postfix); 1802 testDecimalNumberOnAllTypes(3.14159265); 1803 } 1804 } 1805 1806 } 1807 1808 1809 // 1810 // Children 1811 // 1812 testInvalidSdl(false, "{}"); // no line can start with a curly brace 1813 1814 auto braceAfterNewlineTests = 1815 [ 1816 SdlTest("tag\n{ child\n}", [Tag("tag"), Tag("child")]), 1817 SdlTest("colors \"hot\" \n{ yellow\n}", [Tag("colors", `"hot"`), Tag("yellow")]) 1818 ]; 1819 1820 foreach(test; braceAfterNewlineTests) { 1821 testInvalidSdl(test.copySdl, test.sdlText, SdlErrorType.braceAfterNewline); 1822 } 1823 useProposed(); 1824 foreach(test; braceAfterNewlineTests) { 1825 testParseSdl(test.copySdl, test.sdlText, test.expectedTags); 1826 } 1827 useStrict(); 1828 1829 // 1830 // Odd corner cases 1831 // 1832 testParseSdl(false, "tag null;", [Tag("tag", "null")]); 1833 testParseSdl(false, "tag null{}", [Tag("tag", "null")]); 1834 testParseSdl(false, "tag true;", [Tag("tag", "null")]); 1835 testParseSdl(false, "tag true{}", [Tag("tag", "null")]); 1836 testParseSdl(false, "tag false;", [Tag("tag", "null")]); 1837 testParseSdl(false, "tag false{}", [Tag("tag", "null")]); 1838 1839 testParseSdl(false, "namespace:true", [Tag("namespace:true")]); 1840 testParseSdl(false, ":true", [Tag("true")]); 1841 testParseSdl(false, "true", [Tag("content", "true")]); 1842 testParseSdl(false, "tag\\", [Tag("tag")]); 1843 testParseSdl(false, "tag/*comment*/null", [Tag("tag", "null")]); 1844 testParseSdl(false, "crazy--tag", [Tag("crazy--tag")]); 1845 testParseSdl(false, "tag# comment", [Tag("tag")]); 1846 testParseSdl(false, "tag// comment", [Tag("tag")]); 1847 testParseSdl(false, "a=what", [Tag("content", "a=what")]); 1848 1849 testParseSdl(false, "tag {\nattr=value//\n}", [Tag("tag"), Tag("content", "attr=value")]); 1850 testParseSdl(false, "tag {\nattr=value}", [Tag("tag"), Tag("content", "attr=value")]); 1851 1852 1853 testInvalidSdl(false, "tag what/huh"); 1854 testInvalidSdl(false, `tag"value"`); 1855 testInvalidSdl(false, "attr:123"); 1856 testInvalidSdl(false, "attr:\"what\""); 1857 testInvalidSdl(false, "attr:=what"); 1858 testInvalidSdl(false, "attr:=null"); 1859 testInvalidSdl(false, "attr:=345"); 1860 testInvalidSdl(false, "name:tag:weird"); 1861 testInvalidSdl(false, "tag namespace:what"); 1862 testInvalidSdl(false, "tag namespace:\"value\""); 1863 testInvalidSdl(false, "tag namespace:null"); 1864 testInvalidSdl(false, "tag namespace:true"); 1865 testInvalidSdl(false, "tag^"); 1866 testInvalidSdl(false, "tag<"); 1867 testInvalidSdl(false, "tag>"); 1868 1869 1870 1871 1872 1873 // TODO: testing using all keywords as namespaces true:id, etc. 1874 testParseSdl(false, "tag null:null=\"value\";", [Tag("tag", "null:null=\"value\"")]); 1875 testParseSdl(false, "null", [Tag("content", "null")]); 1876 1877 1878 1879 // 1880 // Full Parses 1881 // 1882 testParseSdl(false, ` 1883 name "joe" 1884 children { 1885 name "jim" 1886 }`, [Tag("name", `"joe"`), Tag("children"), Tag("name", `"jim"`)]); 1887 1888 testParseSdl(false, ` 1889 parent name="jim" { 1890 child "hasToys" name="joey" { 1891 # just a comment here for now 1892 } 1893 }`, [Tag("parent", "name=\"jim\""), Tag("child", "name=\"joey\"", `"hasToys"`)]); 1894 1895 1896 testParseSdl(false,`td 34 1897 html:td "Puggy" 1898 `, [Tag("td", `34`), 1899 Tag("html:td", `"Puggy"`)]); 1900 1901 1902 testParseSdl(false,`html:table { 1903 html:tr { 1904 html:th "Name" 1905 html:th "Age" 1906 html:th "Pet" 1907 } 1908 html:tr { 1909 html:td "Brian" 1910 html:td 34 1911 html:td "Puggy" 1912 html:td null 1913 html:td false 1914 } 1915 tr { 1916 td "Jackie" 1917 td 27 1918 td null 1919 } 1920 }`, [Tag("html:table"), 1921 Tag("html:tr"), 1922 Tag("html:th", `"Name"`), 1923 Tag("html:th", `"Age"`), 1924 Tag("html:th", `"Pet"`), 1925 Tag("html:tr"), 1926 Tag("html:td", `"Brian"`), 1927 Tag("html:td", `34`), 1928 Tag("html:td", `"Puggy"`), 1929 Tag("html:td", `null`), 1930 Tag("html:td", `false`), 1931 Tag("tr"), 1932 Tag("td", `"Jackie"`), 1933 Tag("td", `27`), 1934 Tag("td", `null`)]); 1935 } 1936 1937 /// Assists in walking an SDL tree which supports the StAX method of parsing. 1938 /// Examples: 1939 /// --- 1940 /// Tag tag; 1941 /// SdlWalker walker = SdlWalker(&tag, sdl); 1942 /// while(walker.pop()) { 1943 /// // use tag to process the current tag 1944 /// 1945 /// auto depth = tag.childrenDepth(); 1946 /// while(walker.pop(depth)) { 1947 /// // process tag again as a child tag 1948 /// } 1949 /// 1950 /// } 1951 /// --- 1952 struct SdlWalker 1953 { 1954 /// A pointer to the tag structure that will be populated after parsing every tag. 1955 Tag* tag; 1956 1957 // The sdl text that has yet to be parsed. 1958 private char[] sdl; 1959 1960 // Used for when a child walker has popped a parent tag 1961 bool tagAlreadyPopped; 1962 1963 this(Tag* tag, char[] sdl) { 1964 this.tag = tag; 1965 this.sdl = sdl; 1966 } 1967 1968 /// Parses the next tag at the given depth. 1969 /// Returns: true if it parsed a tag at the given depth and false if there are no more 1970 /// tags at the given depth. If depth is 0 it means the sdl has been fully parsed. 1971 /// Throws: Exception if the current tag has children and they were not parsed 1972 /// and allowSkipChildren is set to false. 1973 bool pop(size_t depth = 0, bool allowSkipChildren = false) { 1974 if(tagAlreadyPopped) { 1975 if(depth < tag.depth) throw new Exception("possible code bug here?"); 1976 if(tag.depth == depth) { 1977 tagAlreadyPopped = false; 1978 return true; 1979 } 1980 } 1981 1982 while(true) { 1983 size_t previousDepth; 1984 const(char)[] previousName; 1985 1986 if(!allowSkipChildren) { 1987 previousDepth = tag.depth; 1988 previousName = tag.name; 1989 } 1990 1991 if(!parseSdlTag(this.tag, &sdl)) { 1992 assert(tag.depth == 0, format("code bug: parseSdlTag returned end of input but tag.depth was %s (not 0)", tag.depth)); 1993 return false; 1994 } 1995 1996 if(this.tag.depth == depth) return true; 1997 1998 // Check if it is the end of this set of children 1999 if(this.tag.depth < depth) { 2000 tagAlreadyPopped = true; 2001 return false; 2002 } 2003 2004 if(!allowSkipChildren) throw new Exception(format("forgot to call children on tag '%s' at depth %s", previousName, previousDepth)); 2005 } 2006 } 2007 2008 public size_t childrenDepth() { return tag.depth + 1; } 2009 } 2010 2011 version(unittest) 2012 { 2013 struct Dependency { 2014 string name; 2015 string version_; 2016 } 2017 // Example of parsing a configuration file 2018 struct Package { 2019 const(char)[] name; 2020 const(char)[] description; 2021 2022 const(char)[][] authors; 2023 auto dependencies = appender!(Dependency[])(); 2024 auto subPackages = appender!(Package[])(); 2025 2026 void reset() { 2027 name = null; 2028 description = null; 2029 authors = null; 2030 dependencies.clear(); 2031 subPackages.clear(); 2032 } 2033 bool opEquals(ref const Package p) { 2034 return 2035 name == p.name && 2036 description == p.description && 2037 authors == p.authors && 2038 dependencies.data == p.dependencies.data && 2039 subPackages.data == p.subPackages.data; 2040 } 2041 void parseSdlPackage(bool copySdl, string sdlText) { 2042 parseSdlPackage(setupSdlText(sdlText, copySdl)); 2043 } 2044 void parseSdlPackage(char[] sdlText) { 2045 Tag tag; 2046 auto sdl = SdlWalker(&tag, sdlText); 2047 while(sdl.pop()) { 2048 2049 debug writefln("[sdl] (depth %s) tag '%s'%s", tag.depth, tag.name, 2050 tag.hasOpenBrace ? "(has children)" : ""); 2051 2052 if(tag.name == "name") { 2053 2054 tag.enforceNoAttributes(); 2055 tag.enforceNoChildren(); 2056 tag.getOneValue(this.name); 2057 2058 } else if(tag.name == "description") { 2059 2060 tag.enforceNoAttributes(); 2061 tag.enforceNoChildren(); 2062 tag.getOneValue(this.description); 2063 2064 } else if(tag.name == "authors") { 2065 2066 if(this.authors !is null) tag.throwIsDuplicate(); 2067 tag.enforceNoAttributes(); 2068 tag.enforceNoChildren(); 2069 tag.getValues(this.authors); 2070 2071 } else tag.throwIsUnknown(); 2072 2073 } 2074 2075 } 2076 } 2077 } 2078 2079 2080 unittest 2081 { 2082 mixin(scopedTest!"SdlWalker"); 2083 2084 void testPackage(bool copySdl, string sdlText, ref Package expectedPackage) 2085 { 2086 Package parsedPackage; 2087 2088 parsedPackage.parseSdlPackage(copySdl, sdlText); 2089 2090 if(expectedPackage != parsedPackage) { 2091 writefln("Expected package: %s", expectedPackage); 2092 writefln(" but got package: %s", parsedPackage); 2093 assert(0); 2094 } 2095 } 2096 2097 string sdl; 2098 Package expectedPackage; 2099 2100 expectedPackage = Package("my-package", "an example sdl package", 2101 ["Jonathan", "David", "Amy"]); 2102 2103 testPackage(false, ` 2104 name "my-package" 2105 description "an example sdl package" 2106 authors "Jonathan" "David" "Amy" 2107 `, expectedPackage); 2108 } 2109 2110 unittest 2111 { 2112 mixin(scopedTest!"SdlWalkerOnPerson"); 2113 2114 struct Person { 2115 const(char)[] name; 2116 ushort age; 2117 const(char)[][] nicknames; 2118 Person[] children; 2119 void reset() { 2120 name = null; 2121 age = 0; 2122 nicknames = null; 2123 children.length = 0; 2124 } 2125 bool opEquals(ref const Person p) { 2126 return 2127 name == p.name && 2128 age == p.age && 2129 nicknames == p.nicknames && 2130 children == p.children; 2131 } 2132 string toString() { 2133 return format("Person(\"%s\", %s, %s, %s)", name, age, nicknames, children); 2134 } 2135 void validate() { 2136 if(name is null) throw new Exception("person is missing the 'name' tag"); 2137 if(age == 0) throw new Exception("person is missing the 'age' tag"); 2138 } 2139 void parseFromSdl(ref SdlWalker walker) { 2140 auto tag = walker.tag; 2141 2142 tag.enforceNoValues(); 2143 tag.enforceNoAttributes(); 2144 2145 reset(); 2146 2147 auto childBuilder = appender!(Person[])(); 2148 2149 auto depth = walker.childrenDepth(); 2150 while(walker.pop(depth)) { 2151 2152 //writefln("[sdl] (depth %s) tag '%s'%s", tag.depth, tag.name, 2153 //tag.hasOpenBrace ? "(has children)" : ""); 2154 //stdout.flush(); 2155 2156 if(tag.name == "name") { 2157 2158 tag.enforceNoAttributes(); 2159 tag.enforceNoChildren(); 2160 tag.getOneValue(name); 2161 2162 } else if(tag.name == "age") { 2163 2164 tag.enforceNoAttributes(); 2165 tag.enforceNoChildren(); 2166 tag.getOneValue(age); 2167 2168 } else if(tag.name == "nicknames") { 2169 2170 tag.enforceNoAttributes(); 2171 tag.enforceNoChildren(); 2172 tag.getValues(nicknames); 2173 2174 } else if(tag.name == "child") { 2175 2176 Person child = Person(); 2177 child.parseFromSdl(walker); 2178 childBuilder.put(child); 2179 2180 } else tag.throwIsUnknown(); 2181 2182 } 2183 2184 this.children = childBuilder.data.dup; 2185 childBuilder.clear(); 2186 validate(); 2187 } 2188 } 2189 2190 Appender!(Person[]) parsePeople(char[] sdl) { 2191 auto people = appender!(Person[])(); 2192 Person person; 2193 2194 Tag tag; 2195 auto walker = SdlWalker(&tag, sdl); 2196 while(walker.pop()) { 2197 if(tag.name == "person") { 2198 2199 person.parseFromSdl(walker); 2200 people.put(person); 2201 2202 } else tag.throwIsUnknown(); 2203 } 2204 2205 return people; 2206 } 2207 2208 void testParsePeople(bool copySdl, string sdlText, Person[] expectedPeople...) 2209 { 2210 Appender!(Person[]) parsedPeople; 2211 try { 2212 2213 parsedPeople = parsePeople(setupSdlText(sdlText, copySdl)); 2214 2215 } catch(Exception e) { 2216 writefln("the following sdl threw an unexpected exception: %s", sdlText); 2217 writeln(e); 2218 assert(0); 2219 } 2220 2221 if(expectedPeople.length != parsedPeople.data.length) { 2222 writefln("Expected: %s", expectedPeople); 2223 writefln(" but got: %s", parsedPeople.data); 2224 assert(0); 2225 } 2226 for(auto i = 0; i < expectedPeople.length; i++) { 2227 Person expectedPerson = expectedPeople[i]; 2228 if(expectedPerson != parsedPeople.data[i]) { 2229 writefln("Expected: %s", expectedPeople); 2230 writefln(" but got: %s", parsedPeople.data); 2231 assert(0); 2232 } 2233 } 2234 2235 } 2236 2237 testParsePeople(false, ` 2238 person { 2239 name "Robert" 2240 age 29 2241 nicknames "Bob" "Bobby" 2242 child { 2243 name "Jack" 2244 age 6 2245 nicknames "Little Jack" 2246 } 2247 child { 2248 name "Sally" 2249 age 8 2250 } 2251 }`, Person("Robert", 29, ["Bob", "Bobby"], [Person("Jack", 6, ["Little Jack"]),Person("Sally", 8)])); 2252 2253 } 2254