diff --git a/document.cpp b/document.cpp index 953ac6d825..b55fb1e2c1 100644 --- a/document.cpp +++ b/document.cpp @@ -1,6 +1,7 @@ #include "document.h" #include "node.h" #include "parser.h" +#include "scanner.h" #include namespace YAML @@ -30,10 +31,11 @@ namespace YAML Clear(); std::ifstream fin(fileName.c_str()); - Parser parser(fin); - if(!parser) - return; + Scanner scanner(fin); + scanner.Scan(); +// if(!scanner) +// return; - m_pRoot = parser.ReadNextNode(); +// m_pRoot = parser.ReadNextNode(); } } diff --git a/scanner.cpp b/scanner.cpp new file mode 100644 index 0000000000..f28ba67eaa --- /dev/null +++ b/scanner.cpp @@ -0,0 +1,275 @@ +#include "scanner.h" +#include "token.h" + +namespace YAML +{ + Scanner::Scanner(std::istream& in) + : INPUT(in), m_startedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0), m_column(0) + { + } + + Scanner::~Scanner() + { + } + + /////////////////////////////////////////////////////////////////////// + // Misc. helpers + + // GetChar + // . Extracts a character from the stream and updates our position + char Scanner::GetChar() + { + m_column++; + return INPUT.get(); + } + + // GetLineBreak + // . Eats with no checking + void Scanner::EatLineBreak() + { + m_column = 0; + INPUT.get(); + } + + // EatDocumentStart + // . Eats with no checking + void Scanner::EatDocumentStart() + { + INPUT.get(); + INPUT.get(); + INPUT.get(); + } + + // EatDocumentEnd + // . Eats with no checking + void Scanner::EatDocumentEnd() + { + INPUT.get(); + INPUT.get(); + INPUT.get(); + } + + // IsWhitespaceToBeEaten + // . We can eat whitespace if: + // 1. It's a space + // 2. It's a tab, and we're either: + // a. In the flow context + // b. In the block context but not where a simple key could be allowed + // (i.e., not at the beginning of a line, or following '-', '?', or ':') + bool Scanner::IsWhitespaceToBeEaten() + { + char ch = INPUT.peek(); + + if(ch == ' ') + return true; + + if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed)) + return true; + + return false; + } + + // IsLineBreak + bool Scanner::IsLineBreak() + { + char ch = INPUT.peek(); + return ch == '\n'; // TODO: More types of line breaks + } + + // IsBlank + bool Scanner::IsBlank() + { + char ch = INPUT.peek(); + return IsLineBreak() || ch == ' ' || ch == '\t' || ch == EOF; + } + + // IsDocumentStart + bool Scanner::IsDocumentStart() + { + // needs to be at the start of a new line + if(m_column != 0) + return false; + + // then needs '---' + for(int i=0;i<3;i++) { + if(INPUT.peek() != '-') { + // first put 'em back + for(int j=0;j StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken) + { + m_startedStream = true; + m_simpleKeyAllowed = true; + m_indents.push(-1); + + return pToken; + } + + // StreamEndToken + template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken) + { + // force newline + if(m_column > 0) + m_column = 0; + + // TODO: unroll indentation + // TODO: "reset simple keys" + + m_simpleKeyAllowed = false; + + return pToken; + } + + // DocumentStartToken + template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken) + { + // TODO: unroll indentation + // TODO: reset simple keys + + m_simpleKeyAllowed = false; + + // eat it + EatDocumentStart(); + + return pToken; + } + + // DocumentEndToken + template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken) + { + // TODO: unroll indentation + // TODO: reset simple keys + + m_simpleKeyAllowed = false; + + // eat it + EatDocumentEnd(); + + return pToken; + } + + /////////////////////////////////////////////////////////////////////// + // The main scanning function + + Token *Scanner::ScanNextToken() + { + if(!m_startedStream) + return ScanToken(new StreamStartToken); + + ScanToNextToken(); + // TODO: remove "obsolete potential simple keys" + // TODO: unroll indent + + if(INPUT.peek() == EOF) + return ScanToken(new StreamEndToken); + + if(IsDocumentStart()) + return ScanToken(new DocumentStartToken); + + if(IsDocumentEnd()) + return ScanToken(new DocumentEndToken); + + return 0; + } + + // ScanToNextToken + // . Eats input until we reach the next token-like thing. + void Scanner::ScanToNextToken() + { + while(1) { + // first eat whitespace + while(IsWhitespaceToBeEaten()) + INPUT.get(); + + // then eat a comment + if(INPUT.peek() == Keys::Comment) { + // eat until line break + while(INPUT && !IsLineBreak()) + INPUT.get(); + } + + // if it's NOT a line break, then we're done! + if(!IsLineBreak()) + break; + + // otherwise, let's eat the line break and keep going + EatLineBreak(); + + // new line - we may be able to accept a simple key now + if(m_flowLevel == 0) + m_simpleKeyAllowed = true; + } + } + + // temporary function for testing + void Scanner::Scan() + { + while(Token *pToken = ScanNextToken()) + delete pToken; + } +} diff --git a/scanner.h b/scanner.h new file mode 100644 index 0000000000..7fd8f5afef --- /dev/null +++ b/scanner.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include + +namespace YAML +{ + class Token; + + namespace Keys + { + const char Comment = '#'; + } + + class Scanner + { + public: + Scanner(std::istream& in); + ~Scanner(); + + Token *ScanNextToken(); + void ScanToNextToken(); + + void Scan(); + + private: + char GetChar(); + void EatLineBreak(); + void EatDocumentStart(); + void EatDocumentEnd(); + + bool IsWhitespaceToBeEaten(); + bool IsLineBreak(); + bool IsBlank(); + bool IsDocumentStart(); + bool IsDocumentEnd(); + template T *ScanToken(T *pToken); + + private: + // the stream + std::istream& INPUT; + int m_column; + + // the output (tokens) + std::queue m_tokens; + + // state info + bool m_startedStream; + bool m_simpleKeyAllowed; + int m_flowLevel; // number of unclosed '[' and '{' indicators + std::stack m_indents; + }; +} diff --git a/token.h b/token.h new file mode 100644 index 0000000000..699b93bad6 --- /dev/null +++ b/token.h @@ -0,0 +1,10 @@ +#pragma once + +namespace YAML +{ + class Token {}; + class StreamStartToken: public Token {}; + class StreamEndToken: public Token {}; + class DocumentStartToken: public Token {}; + class DocumentEndToken: public Token {}; +} diff --git a/yaml-reader.vcproj b/yaml-reader.vcproj index a235c5c670..88eb74d39c 100644 --- a/yaml-reader.vcproj +++ b/yaml-reader.vcproj @@ -189,6 +189,10 @@ RelativePath=".\scalar.cpp" > + + @@ -223,10 +227,18 @@ RelativePath=".\scalar.h" > + + + +