diff --git a/document.cpp b/document.cpp index b55fb1e2c1..8ba707d34c 100644 --- a/document.cpp +++ b/document.cpp @@ -2,6 +2,7 @@ #include "node.h" #include "parser.h" #include "scanner.h" +#include "exceptions.h" #include namespace YAML @@ -32,7 +33,11 @@ namespace YAML std::ifstream fin(fileName.c_str()); Scanner scanner(fin); - scanner.Scan(); + + try { + scanner.Scan(); + } catch(const UnknownToken& e) { + } // if(!scanner) // return; diff --git a/exceptions.h b/exceptions.h new file mode 100644 index 0000000000..947b924e8d --- /dev/null +++ b/exceptions.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace YAML +{ + class Exception: public std::exception {}; + + class UnknownToken: public Exception {}; + class IllegalBlockEntry: public Exception {}; + class IllegalMapKey: public Exception {}; + class IllegalMapValue: public Exception {}; + class IllegalScalar: public Exception {}; +} diff --git a/scanner.cpp b/scanner.cpp index f28ba67eaa..5320b29178 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -1,5 +1,6 @@ #include "scanner.h" #include "token.h" +#include "exceptions.h" namespace YAML { @@ -10,6 +11,14 @@ namespace YAML Scanner::~Scanner() { + while(!m_tokens.empty()) { + delete m_tokens.front(); + m_tokens.pop(); + } + + // delete limbo tokens (they're here for RAII) + for(std::set ::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it) + delete *it; } /////////////////////////////////////////////////////////////////////// @@ -157,9 +166,118 @@ namespace YAML return true; } + // IsBlockEntry + bool Scanner::IsBlockEntry() + { + if(INPUT.peek() != Keys::BlockEntry) + return false; + + INPUT.get(); + + // then needs a blank character (or eof) + if(!IsBlank()) { + INPUT.putback(Keys::BlockEntry); + return false; + } + + INPUT.putback(Keys::BlockEntry); + return true; + } + + // IsKey + bool Scanner::IsKey() + { + if(INPUT.peek() != Keys::Key) + return false; + + INPUT.get(); + + // then needs a blank character (or eof), if we're in block context + if(m_flowLevel == 0 && !IsBlank()) { + INPUT.putback(Keys::BlockEntry); + return false; + } + + INPUT.putback(Keys::BlockEntry); + return true; + } + + // IsValue + bool Scanner::IsValue() + { + if(INPUT.peek() != Keys::Value) + return false; + + INPUT.get(); + + // then needs a blank character (or eof), if we're in block context + if(m_flowLevel == 0 && !IsBlank()) { + INPUT.putback(Keys::BlockEntry); + return false; + } + + INPUT.putback(Keys::BlockEntry); + return true; + } + + // IsPlainScalar + // . Rules: + // . Cannot start with a blank. + // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ ` + // . In the block context - ? : must be not be followed with a space. + // . In the flow context ? : are illegal and - must not be followed with a space. + bool Scanner::IsPlainScalar() + { + if(IsBlank()) + return false; + + // never characters + std::string never = ",[]{}#&*!|>\'\"%@`"; + for(unsigned i=0;i void Scanner::ScanAndEnqueue(T *pToken) + { + m_limboTokens.insert(pToken); + m_tokens.push(ScanToken(pToken)); + m_limboTokens.erase(pToken); + } + // StreamStartToken template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken) { @@ -177,7 +295,7 @@ namespace YAML if(m_column > 0) m_column = 0; - // TODO: unroll indentation + PopIndentTo(-1); // TODO: "reset simple keys" m_simpleKeyAllowed = false; @@ -188,8 +306,8 @@ namespace YAML // DocumentStartToken template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken) { - // TODO: unroll indentation - // TODO: reset simple keys + PopIndentTo(m_column); + // TODO: "reset simple keys" m_simpleKeyAllowed = false; @@ -202,8 +320,8 @@ namespace YAML // DocumentEndToken template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken) { - // TODO: unroll indentation - // TODO: reset simple keys + PopIndentTo(m_column); + // TODO: "reset simple keys" m_simpleKeyAllowed = false; @@ -213,28 +331,255 @@ namespace YAML return pToken; } + // FlowSeqStartToken + template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken) + { + // TODO: "save simple key" + // TODO: increase flow level + + m_simpleKeyAllowed = true; + + // eat it + INPUT.get(); + + return pToken; + } + + // FlowMapStartToken + template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken) + { + // TODO: "save simple key" + // TODO: increase flow level + + m_simpleKeyAllowed = true; + + // eat it + INPUT.get(); + + return pToken; + } + + // FlowSeqEndToken + template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken) + { + // TODO: "remove simple key" + // TODO: decrease flow level + + m_simpleKeyAllowed = false; + + // eat it + INPUT.get(); + + return pToken; + } + + // FlowMapEndToken + template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken) + { + // TODO: "remove simple key" + // TODO: decrease flow level + + m_simpleKeyAllowed = false; + + // eat it + INPUT.get(); + + return pToken; + } + + // FlowEntryToken + template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken) + { + // TODO: "remove simple key" + + m_simpleKeyAllowed = true; + + // eat it + INPUT.get(); + + return pToken; + } + + // BlockEntryToken + template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken) + { + // we better be in the block context! + if(m_flowLevel == 0) { + // can we put it here? + if(!m_simpleKeyAllowed) + throw IllegalBlockEntry(); + + PushIndentTo(m_column, true); // , -1 + } else { + // TODO: throw? + } + + // TODO: "remove simple key" + + m_simpleKeyAllowed = true; + + // eat + INPUT.get(); + return pToken; + } + + // KeyToken + template <> KeyToken *Scanner::ScanToken(KeyToken *pToken) + { + // are we in block context? + if(m_flowLevel == 0) { + if(!m_simpleKeyAllowed) + throw IllegalMapKey(); + + PushIndentTo(m_column, false); + } + + // TODO: "remove simple key" + + // can only put a simple key here if we're in block context + if(m_flowLevel == 0) + m_simpleKeyAllowed = true; + else + m_simpleKeyAllowed = false; + + // eat + INPUT.get(); + return pToken; + } + + // ValueToken + template <> ValueToken *Scanner::ScanToken(ValueToken *pToken) + { + // TODO: Is it a simple key? + if(false) { + } else { + // If not, ... + // are we in block context? + if(m_flowLevel == 0) { + if(!m_simpleKeyAllowed) + throw IllegalMapValue(); + + PushIndentTo(m_column, false); + } + } + + // can only put a simple key here if we're in block context + if(m_flowLevel == 0) + m_simpleKeyAllowed = true; + else + m_simpleKeyAllowed = false; + + // eat + INPUT.get(); + return pToken; + } + + // PlainScalarToken + template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) + { + // TODO: "save simple key" + + m_simpleKeyAllowed = false; + + // now eat and store the scalar + while(1) { + // doc start/end tokens + if(IsDocumentStart() || IsDocumentEnd()) + break; + + // comment + if(INPUT.peek() == Keys::Comment) + break; + + // first eat non-blanks + while(!IsBlank()) { + // illegal colon in flow context + if(m_flowLevel > 0 && INPUT.peek() == ':') { + INPUT.get(); + if(!IsBlank()) { + INPUT.putback(':'); + throw IllegalScalar(); + } + INPUT.putback(':'); + } + + // characters that might end the scalar + // TODO: scanner.c line 3434 + } + } + + return pToken; + } + /////////////////////////////////////////////////////////////////////// // The main scanning function - Token *Scanner::ScanNextToken() + void Scanner::ScanNextToken() { if(!m_startedStream) - return ScanToken(new StreamStartToken); + return ScanAndEnqueue(new StreamStartToken); ScanToNextToken(); // TODO: remove "obsolete potential simple keys" - // TODO: unroll indent + PopIndentTo(m_column); if(INPUT.peek() == EOF) - return ScanToken(new StreamEndToken); + return ScanAndEnqueue(new StreamEndToken); + // are we at a document token? if(IsDocumentStart()) - return ScanToken(new DocumentStartToken); + return ScanAndEnqueue(new DocumentStartToken); if(IsDocumentEnd()) - return ScanToken(new DocumentEndToken); + return ScanAndEnqueue(new DocumentEndToken); - return 0; + // are we at a flow start/end/entry? + if(INPUT.peek() == Keys::FlowSeqStart) + return ScanAndEnqueue(new FlowSeqStartToken); + + if(INPUT.peek() == Keys::FlowSeqEnd) + return ScanAndEnqueue(new FlowSeqEndToken); + + if(INPUT.peek() == Keys::FlowMapStart) + return ScanAndEnqueue(new FlowMapStartToken); + + if(INPUT.peek() == Keys::FlowMapEnd) + return ScanAndEnqueue(new FlowMapEndToken); + + if(INPUT.peek() == Keys::FlowEntry) + return ScanAndEnqueue(new FlowEntryToken); + + // block/map stuff? + if(IsBlockEntry()) + return ScanAndEnqueue(new BlockEntryToken); + + if(IsKey()) + return ScanAndEnqueue(new KeyToken); + + if(IsValue()) + return ScanAndEnqueue(new ValueToken); + + // TODO: alias/anchor/tag + + // TODO: special scalars + if(INPUT.peek() == Keys::LiteralScalar && m_flowLevel == 0) + return; + + if(INPUT.peek() == Keys::FoldedScalar && m_flowLevel == 0) + return; + + if(INPUT.peek() == '\'') + return; + + if(INPUT.peek() == '\"') + return; + + // plain scalars + if(IsPlainScalar()) + return ScanAndEnqueue(new PlainScalarToken); + + // don't know what it is! + throw UnknownToken(); } // ScanToNextToken @@ -266,10 +611,47 @@ namespace YAML } } + // PushIndentTo + // . Pushes an indentation onto the stack, and enqueues the + // proper token (sequence start or mapping start). + void Scanner::PushIndentTo(int column, bool sequence) + { + // are we in flow? + if(m_flowLevel > 0) + return; + + // is this actually an indentation? + if(column <= m_indents.top()) + return; + + // now push + m_indents.push(column); + if(sequence) + m_tokens.push(new BlockSeqStartToken); + else + m_tokens.push(new BlockMapStartToken); + } + + // PopIndentTo + // . Pops indentations off the stack until we reach 'column' indentation, + // and enqueues the proper token each time. + void Scanner::PopIndentTo(int column) + { + // are we in flow? + if(m_flowLevel > 0) + return; + + // now pop away + while(!m_indents.empty() && m_indents.top() > column) { + m_indents.pop(); + m_tokens.push(new BlockEndToken); + } + } + // temporary function for testing void Scanner::Scan() { - while(Token *pToken = ScanNextToken()) - delete pToken; + while(INPUT) + ScanNextToken(); } } diff --git a/scanner.h b/scanner.h index 7fd8f5afef..9d9e282e10 100644 --- a/scanner.h +++ b/scanner.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace YAML { @@ -12,6 +13,19 @@ namespace YAML namespace Keys { const char Comment = '#'; + const char FlowSeqStart = '['; + const char FlowSeqEnd = ']'; + const char FlowMapStart = '{'; + const char FlowMapEnd = '}'; + const char FlowEntry = ','; + const char BlockEntry = '-'; + const char Key = '?'; + const char Value = ':'; + const char Alias = '*'; + const char Anchor = '&'; + const char Tag = '!'; + const char LiteralScalar = '|'; + const char FoldedScalar = '>'; } class Scanner @@ -20,8 +34,10 @@ namespace YAML Scanner(std::istream& in); ~Scanner(); - Token *ScanNextToken(); + void ScanNextToken(); void ScanToNextToken(); + void PushIndentTo(int column, bool sequence); + void PopIndentTo(int column); void Scan(); @@ -36,6 +52,12 @@ namespace YAML bool IsBlank(); bool IsDocumentStart(); bool IsDocumentEnd(); + bool IsBlockEntry(); + bool IsKey(); + bool IsValue(); + bool IsPlainScalar(); + + template void ScanAndEnqueue(T *pToken); template T *ScanToken(T *pToken); private: @@ -45,6 +67,7 @@ namespace YAML // the output (tokens) std::queue m_tokens; + std::set m_limboTokens; // state info bool m_startedStream; diff --git a/token.h b/token.h index 699b93bad6..aab0af2e65 100644 --- a/token.h +++ b/token.h @@ -3,8 +3,25 @@ namespace YAML { class Token {}; + class StreamStartToken: public Token {}; class StreamEndToken: public Token {}; class DocumentStartToken: public Token {}; class DocumentEndToken: public Token {}; + + class BlockSeqStartToken: public Token {}; + class BlockMapStartToken: public Token {}; + class BlockEndToken: public Token {}; + class BlockEntryToken: public Token {}; + + class FlowSeqStartToken: public Token {}; + class FlowMapStartToken: public Token {}; + class FlowSeqEndToken: public Token {}; + class FlowMapEndToken: public Token {}; + class FlowEntryToken: public Token {}; + + class KeyToken: public Token {}; + class ValueToken: public Token {}; + + class PlainScalarToken: public Token {}; } diff --git a/yaml-reader.vcproj b/yaml-reader.vcproj index 88eb74d39c..04a440f9ab 100644 --- a/yaml-reader.vcproj +++ b/yaml-reader.vcproj @@ -211,6 +211,10 @@ RelativePath=".\document.h" > + +