diff --git a/document.cpp b/document.cpp index 8ba707d34c..2ae9cc9712 100644 --- a/document.cpp +++ b/document.cpp @@ -36,8 +36,9 @@ namespace YAML try { scanner.Scan(); - } catch(const UnknownToken& e) { + } catch(const Exception& e) { } + getchar(); // if(!scanner) // return; diff --git a/exceptions.h b/exceptions.h index 947b924e8d..2b4e689ebf 100644 --- a/exceptions.h +++ b/exceptions.h @@ -11,4 +11,5 @@ namespace YAML class IllegalMapKey: public Exception {}; class IllegalMapValue: public Exception {}; class IllegalScalar: public Exception {}; + class IllegalTabInScalar: public Exception {}; } diff --git a/scanner.cpp b/scanner.cpp index 5320b29178..80227423bc 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -1,6 +1,7 @@ #include "scanner.h" #include "token.h" #include "exceptions.h" +#include namespace YAML { @@ -32,30 +33,42 @@ namespace YAML return INPUT.get(); } + // Eat + // . Eats 'n' characters and updates our position. + void Scanner::Eat(int n) + { + for(int i=0;i=0;i--) + INPUT.putback(ret[i]); + + return ret; + } + // GetLineBreak // . Eats with no checking void Scanner::EatLineBreak() { + Eat(1); m_column = 0; - INPUT.get(); - } - - // EatDocumentStart - // . Eats with no checking - void Scanner::EatDocumentStart() - { - INPUT.get(); - INPUT.get(); - INPUT.get(); - } - - // EatDocumentEnd - // . Eats with no checking - void Scanner::EatDocumentEnd() - { - INPUT.get(); - INPUT.get(); - INPUT.get(); } // IsWhitespaceToBeEaten @@ -65,10 +78,8 @@ namespace YAML // a. In the flow context // b. In the block context but not where a simple key could be allowed // (i.e., not at the beginning of a line, or following '-', '?', or ':') - bool Scanner::IsWhitespaceToBeEaten() + bool Scanner::IsWhitespaceToBeEaten(char ch) { - char ch = INPUT.peek(); - if(ch == ' ') return true; @@ -79,17 +90,15 @@ namespace YAML } // IsLineBreak - bool Scanner::IsLineBreak() + bool Scanner::IsLineBreak(char ch) { - char ch = INPUT.peek(); return ch == '\n'; // TODO: More types of line breaks } // IsBlank - bool Scanner::IsBlank() + bool Scanner::IsBlank(char ch) { - char ch = INPUT.peek(); - return IsLineBreak() || ch == ' ' || ch == '\t' || ch == EOF; + return IsLineBreak(ch) || ch == ' ' || ch == '\t' || ch == EOF; } // IsDocumentStart @@ -99,34 +108,8 @@ namespace YAML if(m_column != 0) return false; - // then needs '---' - for(int i=0;i<3;i++) { - if(INPUT.peek() != '-') { - // first put 'em back - for(int j=0;j 0); } // IsValue bool Scanner::IsValue() { - if(INPUT.peek() != Keys::Value) - return false; - - INPUT.get(); - - // then needs a blank character (or eof), if we're in block context - if(m_flowLevel == 0 && !IsBlank()) { - INPUT.putback(Keys::BlockEntry); - return false; - } - - INPUT.putback(Keys::BlockEntry); - return true; + std::string next = Peek(2); + return next[0] == Keys::Value && (IsBlank(next[1]) || m_flowLevel > 0); } // IsPlainScalar @@ -228,34 +152,25 @@ namespace YAML // . In the flow context ? : are illegal and - must not be followed with a space. bool Scanner::IsPlainScalar() { - if(IsBlank()) + std::string next = Peek(2); + + if(IsBlank(next[0])) return false; // never characters - std::string never = ",[]{}#&*!|>\'\"%@`"; - for(unsigned i=0;i\'\"%@`").find(next[0]) != std::string::npos) + return false; // specific block/flow characters if(m_flowLevel == 0) { - if(INPUT.peek() == '-' || INPUT.peek() == '?' || INPUT.peek() == ':') { - char ch = INPUT.get(); - if(IsBlank()) { - INPUT.putback(ch); - return false; - } - } - } else { - if(INPUT.peek() == '?' || INPUT.peek() == ':') + if((next[0] == '-' || next[0] == '?' || next[0] == ':') && IsBlank(next[1])) + return false; + } else { + if(next[0] == '?' || next[0] == ':') + return false; + + if(next[0] == '-' && IsBlank(next[1])) return false; - if(INPUT.peek() == '-') { - INPUT.get(); - if(IsBlank()) { - INPUT.putback('-'); - return false; - } - } } return true; @@ -311,8 +226,8 @@ namespace YAML m_simpleKeyAllowed = false; - // eat it - EatDocumentStart(); + // eat + Eat(3); return pToken; } @@ -325,8 +240,8 @@ namespace YAML m_simpleKeyAllowed = false; - // eat it - EatDocumentEnd(); + // eat + Eat(3); return pToken; } @@ -419,7 +334,7 @@ namespace YAML m_simpleKeyAllowed = true; // eat - INPUT.get(); + Eat(1); return pToken; } @@ -443,7 +358,7 @@ namespace YAML m_simpleKeyAllowed = false; // eat - INPUT.get(); + Eat(1); return pToken; } @@ -453,61 +368,90 @@ namespace YAML // TODO: Is it a simple key? if(false) { } else { - // If not, ... - // are we in block context? - if(m_flowLevel == 0) { - if(!m_simpleKeyAllowed) - throw IllegalMapValue(); - - PushIndentTo(m_column, false); - } - } - + // If not, ... + // are we in block context? + if(m_flowLevel == 0) { + if(!m_simpleKeyAllowed) + throw IllegalMapValue(); + + PushIndentTo(m_column, false); + } + } + // can only put a simple key here if we're in block context if(m_flowLevel == 0) m_simpleKeyAllowed = true; else - m_simpleKeyAllowed = false; - - // eat - INPUT.get(); + m_simpleKeyAllowed = false; + + // eat + Eat(1); return pToken; } // PlainScalarToken template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) - { - // TODO: "save simple key" - - m_simpleKeyAllowed = false; - - // now eat and store the scalar - while(1) { - // doc start/end tokens - if(IsDocumentStart() || IsDocumentEnd()) - break; - - // comment - if(INPUT.peek() == Keys::Comment) - break; - - // first eat non-blanks - while(!IsBlank()) { - // illegal colon in flow context - if(m_flowLevel > 0 && INPUT.peek() == ':') { - INPUT.get(); - if(!IsBlank()) { - INPUT.putback(':'); - throw IllegalScalar(); - } - INPUT.putback(':'); - } - - // characters that might end the scalar - // TODO: scanner.c line 3434 - } - } - + { + // TODO: "save simple key" + + m_simpleKeyAllowed = false; + + // now eat and store the scalar + std::string scalar; + bool leadingBlanks = true; + + while(INPUT) { + // doc start/end tokens + if(IsDocumentStart() || IsDocumentEnd()) + break; + + // comment + if(INPUT.peek() == Keys::Comment) + break; + + // first eat non-blanks + while(INPUT && !IsBlank(INPUT.peek())) { + std::string next = Peek(2); + + // illegal colon in flow context + if(m_flowLevel > 0 && next[0] == ':') { + if(!IsBlank(next[1])) + throw IllegalScalar(); + } + + // characters that might end the scalar + if(next[0] == ':' && IsBlank(next[1])) + break; + if(m_flowLevel > 0 && std::string(",:?[]{}").find(next[0]) != std::string::npos) + break; + + scalar += GetChar(); + } + + // now eat blanks + while(IsBlank(INPUT.peek()) /* || IsBreak(INPUT.peek()) */) { + if(IsBlank(INPUT.peek())) { + if(leadingBlanks && m_column <= m_indents.top()) + throw IllegalTabInScalar(); + + // TODO: Store some blanks? + Eat(1); + } else { + Eat(1); + } + } + + // TODO: join whitespace + + // and finally break if we're below the indentation level + if(m_flowLevel == 0 && m_column <= m_indents.top()) + break; + } + + // now modify our token + if(leadingBlanks) + m_simpleKeyAllowed = true; + return pToken; } @@ -588,18 +532,18 @@ namespace YAML { while(1) { // first eat whitespace - while(IsWhitespaceToBeEaten()) - INPUT.get(); + while(IsWhitespaceToBeEaten(INPUT.peek())) + Eat(1); // then eat a comment if(INPUT.peek() == Keys::Comment) { // eat until line break - while(INPUT && !IsLineBreak()) - INPUT.get(); + while(INPUT && !IsLineBreak(INPUT.peek())) + Eat(1); } // if it's NOT a line break, then we're done! - if(!IsLineBreak()) + if(!IsLineBreak(INPUT.peek())) break; // otherwise, let's eat the line break and keep going @@ -651,7 +595,15 @@ namespace YAML // temporary function for testing void Scanner::Scan() { - while(INPUT) + while(INPUT) { ScanNextToken(); + + while(!m_tokens.empty()) { + Token *pToken = m_tokens.front(); + m_tokens.pop(); + std::cout << typeid(*pToken).name() << std::endl; + delete pToken; + } + } } } diff --git a/scanner.h b/scanner.h index 9d9e282e10..1bc9e1a0ce 100644 --- a/scanner.h +++ b/scanner.h @@ -43,13 +43,14 @@ namespace YAML private: char GetChar(); - void EatLineBreak(); - void EatDocumentStart(); - void EatDocumentEnd(); + void Eat(int n = 1); + std::string Peek(int n); - bool IsWhitespaceToBeEaten(); - bool IsLineBreak(); - bool IsBlank(); + void EatLineBreak(); + + bool IsWhitespaceToBeEaten(char ch); + bool IsLineBreak(char ch); + bool IsBlank(char ch); bool IsDocumentStart(); bool IsDocumentEnd(); bool IsBlockEntry(); diff --git a/token.h b/token.h index aab0af2e65..da3dfff01c 100644 --- a/token.h +++ b/token.h @@ -2,7 +2,7 @@ namespace YAML { - class Token {}; + class Token { public: virtual ~Token() {} }; class StreamStartToken: public Token {}; class StreamEndToken: public Token {};