Started the scanner.

This commit is contained in:
Jesse Beder 2008-06-26 09:05:28 +00:00
parent bcbca461de
commit 8ae7b48188
5 changed files with 358 additions and 4 deletions

View file

@ -1,6 +1,7 @@
#include "document.h"
#include "node.h"
#include "parser.h"
#include "scanner.h"
#include <fstream>
namespace YAML
@ -30,10 +31,11 @@ namespace YAML
Clear();
std::ifstream fin(fileName.c_str());
Parser parser(fin);
if(!parser)
return;
Scanner scanner(fin);
scanner.Scan();
// if(!scanner)
// return;
m_pRoot = parser.ReadNextNode();
// m_pRoot = parser.ReadNextNode();
}
}

275
scanner.cpp Normal file
View file

@ -0,0 +1,275 @@
#include "scanner.h"
#include "token.h"
namespace YAML
{
Scanner::Scanner(std::istream& in)
: INPUT(in), m_startedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0), m_column(0)
{
}
Scanner::~Scanner()
{
}
///////////////////////////////////////////////////////////////////////
// Misc. helpers
// GetChar
// . Extracts a character from the stream and updates our position
char Scanner::GetChar()
{
m_column++;
return INPUT.get();
}
// GetLineBreak
// . Eats with no checking
void Scanner::EatLineBreak()
{
m_column = 0;
INPUT.get();
}
// EatDocumentStart
// . Eats with no checking
void Scanner::EatDocumentStart()
{
INPUT.get();
INPUT.get();
INPUT.get();
}
// EatDocumentEnd
// . Eats with no checking
void Scanner::EatDocumentEnd()
{
INPUT.get();
INPUT.get();
INPUT.get();
}
// IsWhitespaceToBeEaten
// . We can eat whitespace if:
// 1. It's a space
// 2. It's a tab, and we're either:
// a. In the flow context
// b. In the block context but not where a simple key could be allowed
// (i.e., not at the beginning of a line, or following '-', '?', or ':')
bool Scanner::IsWhitespaceToBeEaten()
{
char ch = INPUT.peek();
if(ch == ' ')
return true;
if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
return true;
return false;
}
// IsLineBreak
bool Scanner::IsLineBreak()
{
char ch = INPUT.peek();
return ch == '\n'; // TODO: More types of line breaks
}
// IsBlank
bool Scanner::IsBlank()
{
char ch = INPUT.peek();
return IsLineBreak() || ch == ' ' || ch == '\t' || ch == EOF;
}
// IsDocumentStart
bool Scanner::IsDocumentStart()
{
// needs to be at the start of a new line
if(m_column != 0)
return false;
// then needs '---'
for(int i=0;i<3;i++) {
if(INPUT.peek() != '-') {
// first put 'em back
for(int j=0;j<i;j++)
INPUT.putback('-');
// and return
return false;
}
INPUT.get();
}
// then needs a blank character (or eof)
if(!IsBlank()) {
// put 'em back
for(int i=0;i<3;i++)
INPUT.putback('-');
// and return
return false;
}
// finally, put 'em back and go
for(int i=0;i<3;i++)
INPUT.putback('-');
return true;
}
// IsDocumentEnd
bool Scanner::IsDocumentEnd()
{
// needs to be at the start of a new line
if(m_column != 0)
return false;
// then needs '...'
for(int i=0;i<3;i++) {
if(INPUT.peek() != '.') {
// first put 'em back
for(int j=0;j<i;j++)
INPUT.putback('.');
// and return
return false;
}
INPUT.get();
}
// then needs a blank character (or eof)
if(!IsBlank()) {
// put 'em back
for(int i=0;i<3;i++)
INPUT.putback('.');
// and return
return false;
}
// finally, put 'em back and go
for(int i=0;i<3;i++)
INPUT.putback('-');
return true;
}
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
// TODO: unroll indentation
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
return pToken;
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
// TODO: unroll indentation
// TODO: reset simple keys
m_simpleKeyAllowed = false;
// eat it
EatDocumentStart();
return pToken;
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{
// TODO: unroll indentation
// TODO: reset simple keys
m_simpleKeyAllowed = false;
// eat it
EatDocumentEnd();
return pToken;
}
///////////////////////////////////////////////////////////////////////
// The main scanning function
Token *Scanner::ScanNextToken()
{
if(!m_startedStream)
return ScanToken(new StreamStartToken);
ScanToNextToken();
// TODO: remove "obsolete potential simple keys"
// TODO: unroll indent
if(INPUT.peek() == EOF)
return ScanToken(new StreamEndToken);
if(IsDocumentStart())
return ScanToken(new DocumentStartToken);
if(IsDocumentEnd())
return ScanToken(new DocumentEndToken);
return 0;
}
// ScanToNextToken
// . Eats input until we reach the next token-like thing.
void Scanner::ScanToNextToken()
{
while(1) {
// first eat whitespace
while(IsWhitespaceToBeEaten())
INPUT.get();
// then eat a comment
if(INPUT.peek() == Keys::Comment) {
// eat until line break
while(INPUT && !IsLineBreak())
INPUT.get();
}
// if it's NOT a line break, then we're done!
if(!IsLineBreak())
break;
// otherwise, let's eat the line break and keep going
EatLineBreak();
// new line - we may be able to accept a simple key now
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
}
}
// temporary function for testing
void Scanner::Scan()
{
while(Token *pToken = ScanNextToken())
delete pToken;
}
}

55
scanner.h Normal file
View file

@ -0,0 +1,55 @@
#pragma once
#include <ios>
#include <string>
#include <queue>
#include <stack>
namespace YAML
{
class Token;
namespace Keys
{
const char Comment = '#';
}
class Scanner
{
public:
Scanner(std::istream& in);
~Scanner();
Token *ScanNextToken();
void ScanToNextToken();
void Scan();
private:
char GetChar();
void EatLineBreak();
void EatDocumentStart();
void EatDocumentEnd();
bool IsWhitespaceToBeEaten();
bool IsLineBreak();
bool IsBlank();
bool IsDocumentStart();
bool IsDocumentEnd();
template <typename T> T *ScanToken(T *pToken);
private:
// the stream
std::istream& INPUT;
int m_column;
// the output (tokens)
std::queue <Token *> m_tokens;
// state info
bool m_startedStream;
bool m_simpleKeyAllowed;
int m_flowLevel; // number of unclosed '[' and '{' indicators
std::stack <int> m_indents;
};
}

10
token.h Normal file
View file

@ -0,0 +1,10 @@
#pragma once
namespace YAML
{
class Token {};
class StreamStartToken: public Token {};
class StreamEndToken: public Token {};
class DocumentStartToken: public Token {};
class DocumentEndToken: public Token {};
}

View file

@ -189,6 +189,10 @@
RelativePath=".\scalar.cpp"
>
</File>
<File
RelativePath=".\scanner.cpp"
>
</File>
<File
RelativePath=".\sequence.cpp"
>
@ -223,10 +227,18 @@
RelativePath=".\scalar.h"
>
</File>
<File
RelativePath=".\scanner.h"
>
</File>
<File
RelativePath=".\sequence.h"
>
</File>
<File
RelativePath=".\token.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"