diff --git a/document.cpp b/document.cpp index ca07e66212..cb5c7cf06c 100644 --- a/document.cpp +++ b/document.cpp @@ -5,6 +5,9 @@ #include "exceptions.h" #include +#include +#include "token.h" + namespace YAML { Document::Document(): m_pRoot(0) @@ -34,10 +37,16 @@ namespace YAML std::ifstream fin(fileName.c_str()); Scanner scanner(fin); - scanner.Scan(); + // scan and output, for now + while(1) { + Token *pToken = scanner.GetNextToken(); + if(!pToken) + break; + + std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl; + delete pToken; + } getchar(); -// if(!scanner) -// return; // m_pRoot = parser.ReadNextNode(); } diff --git a/exceptions.h b/exceptions.h index 3c66d85ac8..9c5df62687 100644 --- a/exceptions.h +++ b/exceptions.h @@ -5,33 +5,34 @@ namespace YAML { class Exception: public std::exception {}; + class ScannerException: public Exception {}; - class UnknownToken: public Exception {}; - class IllegalBlockEntry: public Exception {}; - class IllegalMapKey: public Exception {}; - class IllegalMapValue: public Exception {}; - class IllegalScalar: public Exception {}; - class IllegalTabInIndentation: public Exception {}; - class IllegalFlowEnd: public Exception {}; - class IllegalDocIndicator: public Exception {}; - class IllegalEOF: public Exception {}; - class RequiredSimpleKeyNotFound: public Exception {}; - class ZeroIndentationInBlockScalar: public Exception {}; - class UnexpectedCharacterInBlockScalar: public Exception {}; - class AnchorNotFound: public Exception {}; - class IllegalCharacterInAnchor: public Exception {}; + class UnknownToken: public ScannerException {}; + class IllegalBlockEntry: public ScannerException {}; + class IllegalMapKey: public ScannerException {}; + class IllegalMapValue: public ScannerException {}; + class IllegalScalar: public ScannerException {}; + class IllegalTabInIndentation: public ScannerException {}; + class IllegalFlowEnd: public ScannerException {}; + class IllegalDocIndicator: public ScannerException {}; + class IllegalEOF: public ScannerException {}; + class RequiredSimpleKeyNotFound: public ScannerException {}; + class ZeroIndentationInBlockScalar: public ScannerException {}; + class UnexpectedCharacterInBlockScalar: public ScannerException {}; + class AnchorNotFound: public ScannerException {}; + class IllegalCharacterInAnchor: public ScannerException {}; - class UnknownEscapeSequence: public Exception { + class UnknownEscapeSequence: public ScannerException { public: UnknownEscapeSequence(char ch_): ch(ch_) {} char ch; }; - class NonHexNumber: public Exception { + class NonHexNumber: public ScannerException { public: NonHexNumber(char ch_): ch(ch_) {} char ch; }; - class InvalidUnicode: public Exception { + class InvalidUnicode: public ScannerException { public: InvalidUnicode(unsigned value_): value(value_) {} unsigned value; diff --git a/scanner.cpp b/scanner.cpp index fb49bbd665..c3e82ec1e4 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -2,7 +2,6 @@ #include "token.h" #include "exceptions.h" #include "exp.h" -#include namespace YAML { @@ -23,39 +22,51 @@ namespace YAML delete *it; } - /////////////////////////////////////////////////////////////////////// - // Misc. helpers - - // IsWhitespaceToBeEaten - // . We can eat whitespace if: - // 1. It's a space - // 2. It's a tab, and we're either: - // a. In the flow context - // b. In the block context but not where a simple key could be allowed - // (i.e., not at the beginning of a line, or following '-', '?', or ':') - bool Scanner::IsWhitespaceToBeEaten(char ch) + // GetNextToken + // . Removes and returns the next token on the queue. + Token *Scanner::GetNextToken() { - if(ch == ' ') - return true; - - if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed)) - return true; - - return false; + Token *pToken = PeekNextToken(); + if(!m_tokens.empty()) + m_tokens.pop(); + return pToken; } - // ScanAndEnqueue - // . Scans the token, then pushes it in the queue. - // . Note: we also use a set of "limbo tokens", i.e., tokens - // that haven't yet been pushed. This way, if ScanToken() - // throws an exception, we'll be keeping track of 'pToken' - // somewhere, and it will be automatically cleaned up when - // the Scanner destructs. - template void Scanner::ScanAndEnqueue(T *pToken) + // PeekNextToken + // . Returns (but does not remove) the next token on the queue, and scans if only we need to. + Token *Scanner::PeekNextToken() { - m_limboTokens.insert(pToken); - m_tokens.push(ScanToken(pToken)); - m_limboTokens.erase(pToken); + while(1) { + Token *pToken = 0; + + // is there a token in the queue? + if(!m_tokens.empty()) + pToken = m_tokens.front(); + + // (here's where we clean up the impossible tokens) + if(pToken && pToken->status == TS_INVALID) { + m_tokens.pop(); + delete pToken; + continue; + } + + // on unverified tokens, we just have to wait + if(pToken && pToken->status == TS_UNVERIFIED) + pToken = 0; + + // then that's what we want + if(pToken) + return pToken; + + // no token? maybe we've actually finished + if(m_endedStream) + break; + + // no? then scan... + ScanNextToken(); + } + + return 0; } // ScanNextToken @@ -166,7 +177,8 @@ namespace YAML break; // otherwise, let's eat the line break and keep going - INPUT.EatLineBreak(); + int n = Exp::Break.Match(INPUT); + INPUT.Eat(n); // oh yeah, and let's get rid of that simple key VerifySimpleKey(); @@ -177,6 +189,41 @@ namespace YAML } } + /////////////////////////////////////////////////////////////////////// + // Misc. helpers + + // IsWhitespaceToBeEaten + // . We can eat whitespace if: + // 1. It's a space + // 2. It's a tab, and we're either: + // a. In the flow context + // b. In the block context but not where a simple key could be allowed + // (i.e., not at the beginning of a line, or following '-', '?', or ':') + bool Scanner::IsWhitespaceToBeEaten(char ch) + { + if(ch == ' ') + return true; + + if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed)) + return true; + + return false; + } + + // ScanAndEnqueue + // . Scans the token, then pushes it in the queue. + // . Note: we also use a set of "limbo tokens", i.e., tokens + // that haven't yet been pushed. This way, if ScanToken() + // throws an exception, we'll be keeping track of 'pToken' + // somewhere, and it will be automatically cleaned up when + // the Scanner destructs. + template void Scanner::ScanAndEnqueue(T *pToken) + { + m_limboTokens.insert(pToken); + m_tokens.push(ScanToken(pToken)); + m_limboTokens.erase(pToken); + } + // PushIndentTo // . Pushes an indentation onto the stack, and enqueues the // proper token (sequence start or mapping start). @@ -216,56 +263,4 @@ namespace YAML m_tokens.push(new BlockEndToken); } } - - // GetNextToken - // . Returns the next token on the queue, and scans if only we need to. - Token *Scanner::GetNextToken() - { - while(1) { - Token *pToken = 0; - - // is there a token in the queue? - if(!m_tokens.empty()) - pToken = m_tokens.front(); - - // (here's where we clean up the impossible tokens) - if(pToken && pToken->status == TS_INVALID) { - m_tokens.pop(); - delete pToken; - continue; - } - - // on unverified tokens, we just have to wait - if(pToken && pToken->status == TS_UNVERIFIED) - pToken = 0; - - // then that's what we want - if(pToken) { - m_tokens.pop(); - return pToken; - } - - // no token? maybe we've actually finished - if(m_endedStream) - break; - - // no? then scan... - ScanNextToken(); - } - - return 0; - } - - // temporary function for testing - void Scanner::Scan() - { - while(1) { - Token *pToken = GetNextToken(); - if(!pToken) - break; - - std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl; - delete pToken; - } - } } diff --git a/scanner.h b/scanner.h index cf402cabfd..8f711cb16a 100644 --- a/scanner.h +++ b/scanner.h @@ -19,7 +19,7 @@ namespace YAML ~Scanner(); Token *GetNextToken(); - void Scan(); + Token *PeekNextToken(); private: // scanning diff --git a/stream.cpp b/stream.cpp index b4a5050e13..e7e3010eea 100644 --- a/stream.cpp +++ b/stream.cpp @@ -32,12 +32,4 @@ namespace YAML for(int i=0;i