Continued working on scanner.

We're now using exceptions for errors, and scanning/pushing tokens is exception-safe (using a set of "limbo tokens").
This commit is contained in:
Jesse Beder 2008-06-26 19:30:11 +00:00
parent 8ae7b48188
commit a3961d047f
6 changed files with 461 additions and 16 deletions

View file

@ -2,6 +2,7 @@
#include "node.h" #include "node.h"
#include "parser.h" #include "parser.h"
#include "scanner.h" #include "scanner.h"
#include "exceptions.h"
#include <fstream> #include <fstream>
namespace YAML namespace YAML
@ -32,7 +33,11 @@ namespace YAML
std::ifstream fin(fileName.c_str()); std::ifstream fin(fileName.c_str());
Scanner scanner(fin); Scanner scanner(fin);
scanner.Scan();
try {
scanner.Scan();
} catch(const UnknownToken& e) {
}
// if(!scanner) // if(!scanner)
// return; // return;

14
exceptions.h Normal file
View file

@ -0,0 +1,14 @@
#pragma once
#include <exception>
namespace YAML
{
class Exception: public std::exception {};
class UnknownToken: public Exception {};
class IllegalBlockEntry: public Exception {};
class IllegalMapKey: public Exception {};
class IllegalMapValue: public Exception {};
class IllegalScalar: public Exception {};
}

View file

@ -1,5 +1,6 @@
#include "scanner.h" #include "scanner.h"
#include "token.h" #include "token.h"
#include "exceptions.h"
namespace YAML namespace YAML
{ {
@ -10,6 +11,14 @@ namespace YAML
Scanner::~Scanner() Scanner::~Scanner()
{ {
while(!m_tokens.empty()) {
delete m_tokens.front();
m_tokens.pop();
}
// delete limbo tokens (they're here for RAII)
for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
delete *it;
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
@ -157,9 +166,118 @@ namespace YAML
return true; return true;
} }
// IsBlockEntry
bool Scanner::IsBlockEntry()
{
if(INPUT.peek() != Keys::BlockEntry)
return false;
INPUT.get();
// then needs a blank character (or eof)
if(!IsBlank()) {
INPUT.putback(Keys::BlockEntry);
return false;
}
INPUT.putback(Keys::BlockEntry);
return true;
}
// IsKey
bool Scanner::IsKey()
{
if(INPUT.peek() != Keys::Key)
return false;
INPUT.get();
// then needs a blank character (or eof), if we're in block context
if(m_flowLevel == 0 && !IsBlank()) {
INPUT.putback(Keys::BlockEntry);
return false;
}
INPUT.putback(Keys::BlockEntry);
return true;
}
// IsValue
bool Scanner::IsValue()
{
if(INPUT.peek() != Keys::Value)
return false;
INPUT.get();
// then needs a blank character (or eof), if we're in block context
if(m_flowLevel == 0 && !IsBlank()) {
INPUT.putback(Keys::BlockEntry);
return false;
}
INPUT.putback(Keys::BlockEntry);
return true;
}
// IsPlainScalar
// . Rules:
// . Cannot start with a blank.
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? : are illegal and - must not be followed with a space.
bool Scanner::IsPlainScalar()
{
if(IsBlank())
return false;
// never characters
std::string never = ",[]{}#&*!|>\'\"%@`";
for(unsigned i=0;i<never.size();i++)
if(INPUT.peek() == never[i])
return false;
// specific block/flow characters
if(m_flowLevel == 0) {
if(INPUT.peek() == '-' || INPUT.peek() == '?' || INPUT.peek() == ':') {
char ch = INPUT.get();
if(IsBlank()) {
INPUT.putback(ch);
return false;
}
}
} else {
if(INPUT.peek() == '?' || INPUT.peek() == ':')
return false;
if(INPUT.peek() == '-') {
INPUT.get();
if(IsBlank()) {
INPUT.putback('-');
return false;
}
}
}
return true;
}
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens // Specialization for scanning specific tokens
// ScanAndEnqueue
// . Scans the token, then pushes it in the queue.
// . Note: we also use a set of "limbo tokens", i.e., tokens
// that haven't yet been pushed. This way, if ScanToken()
// throws an exception, we'll be keeping track of 'pToken'
// somewhere, and it will be automatically cleaned up when
// the Scanner destructs.
template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
{
m_limboTokens.insert(pToken);
m_tokens.push(ScanToken(pToken));
m_limboTokens.erase(pToken);
}
// StreamStartToken // StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken) template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{ {
@ -177,7 +295,7 @@ namespace YAML
if(m_column > 0) if(m_column > 0)
m_column = 0; m_column = 0;
// TODO: unroll indentation PopIndentTo(-1);
// TODO: "reset simple keys" // TODO: "reset simple keys"
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
@ -188,8 +306,8 @@ namespace YAML
// DocumentStartToken // DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken) template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{ {
// TODO: unroll indentation PopIndentTo(m_column);
// TODO: reset simple keys // TODO: "reset simple keys"
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
@ -202,8 +320,8 @@ namespace YAML
// DocumentEndToken // DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken) template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{ {
// TODO: unroll indentation PopIndentTo(m_column);
// TODO: reset simple keys // TODO: "reset simple keys"
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
@ -213,28 +331,255 @@ namespace YAML
return pToken; return pToken;
} }
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat it
INPUT.get();
return pToken;
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat it
INPUT.get();
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat it
INPUT.get();
return pToken;
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat it
INPUT.get();
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
{
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat it
INPUT.get();
return pToken;
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
{
// we better be in the block context!
if(m_flowLevel == 0) {
// can we put it here?
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true); // , -1
} else {
// TODO: throw?
}
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
INPUT.get();
return pToken;
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
{
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
}
// TODO: "remove simple key"
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
INPUT.get();
return pToken;
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
{
// TODO: Is it a simple key?
if(false) {
} else {
// If not, ...
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
}
}
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
INPUT.get();
return pToken;
}
// PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
// TODO: "save simple key"
m_simpleKeyAllowed = false;
// now eat and store the scalar
while(1) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(INPUT.peek() == Keys::Comment)
break;
// first eat non-blanks
while(!IsBlank()) {
// illegal colon in flow context
if(m_flowLevel > 0 && INPUT.peek() == ':') {
INPUT.get();
if(!IsBlank()) {
INPUT.putback(':');
throw IllegalScalar();
}
INPUT.putback(':');
}
// characters that might end the scalar
// TODO: scanner.c line 3434
}
}
return pToken;
}
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// The main scanning function // The main scanning function
Token *Scanner::ScanNextToken() void Scanner::ScanNextToken()
{ {
if(!m_startedStream) if(!m_startedStream)
return ScanToken(new StreamStartToken); return ScanAndEnqueue(new StreamStartToken);
ScanToNextToken(); ScanToNextToken();
// TODO: remove "obsolete potential simple keys" // TODO: remove "obsolete potential simple keys"
// TODO: unroll indent PopIndentTo(m_column);
if(INPUT.peek() == EOF) if(INPUT.peek() == EOF)
return ScanToken(new StreamEndToken); return ScanAndEnqueue(new StreamEndToken);
// are we at a document token?
if(IsDocumentStart()) if(IsDocumentStart())
return ScanToken(new DocumentStartToken); return ScanAndEnqueue(new DocumentStartToken);
if(IsDocumentEnd()) if(IsDocumentEnd())
return ScanToken(new DocumentEndToken); return ScanAndEnqueue(new DocumentEndToken);
return 0; // are we at a flow start/end/entry?
if(INPUT.peek() == Keys::FlowSeqStart)
return ScanAndEnqueue(new FlowSeqStartToken);
if(INPUT.peek() == Keys::FlowSeqEnd)
return ScanAndEnqueue(new FlowSeqEndToken);
if(INPUT.peek() == Keys::FlowMapStart)
return ScanAndEnqueue(new FlowMapStartToken);
if(INPUT.peek() == Keys::FlowMapEnd)
return ScanAndEnqueue(new FlowMapEndToken);
if(INPUT.peek() == Keys::FlowEntry)
return ScanAndEnqueue(new FlowEntryToken);
// block/map stuff?
if(IsBlockEntry())
return ScanAndEnqueue(new BlockEntryToken);
if(IsKey())
return ScanAndEnqueue(new KeyToken);
if(IsValue())
return ScanAndEnqueue(new ValueToken);
// TODO: alias/anchor/tag
// TODO: special scalars
if(INPUT.peek() == Keys::LiteralScalar && m_flowLevel == 0)
return;
if(INPUT.peek() == Keys::FoldedScalar && m_flowLevel == 0)
return;
if(INPUT.peek() == '\'')
return;
if(INPUT.peek() == '\"')
return;
// plain scalars
if(IsPlainScalar())
return ScanAndEnqueue(new PlainScalarToken);
// don't know what it is!
throw UnknownToken();
} }
// ScanToNextToken // ScanToNextToken
@ -266,10 +611,47 @@ namespace YAML
} }
} }
// PushIndentTo
// . Pushes an indentation onto the stack, and enqueues the
// proper token (sequence start or mapping start).
void Scanner::PushIndentTo(int column, bool sequence)
{
// are we in flow?
if(m_flowLevel > 0)
return;
// is this actually an indentation?
if(column <= m_indents.top())
return;
// now push
m_indents.push(column);
if(sequence)
m_tokens.push(new BlockSeqStartToken);
else
m_tokens.push(new BlockMapStartToken);
}
// PopIndentTo
// . Pops indentations off the stack until we reach 'column' indentation,
// and enqueues the proper token each time.
void Scanner::PopIndentTo(int column)
{
// are we in flow?
if(m_flowLevel > 0)
return;
// now pop away
while(!m_indents.empty() && m_indents.top() > column) {
m_indents.pop();
m_tokens.push(new BlockEndToken);
}
}
// temporary function for testing // temporary function for testing
void Scanner::Scan() void Scanner::Scan()
{ {
while(Token *pToken = ScanNextToken()) while(INPUT)
delete pToken; ScanNextToken();
} }
} }

View file

@ -4,6 +4,7 @@
#include <string> #include <string>
#include <queue> #include <queue>
#include <stack> #include <stack>
#include <set>
namespace YAML namespace YAML
{ {
@ -12,6 +13,19 @@ namespace YAML
namespace Keys namespace Keys
{ {
const char Comment = '#'; const char Comment = '#';
const char FlowSeqStart = '[';
const char FlowSeqEnd = ']';
const char FlowMapStart = '{';
const char FlowMapEnd = '}';
const char FlowEntry = ',';
const char BlockEntry = '-';
const char Key = '?';
const char Value = ':';
const char Alias = '*';
const char Anchor = '&';
const char Tag = '!';
const char LiteralScalar = '|';
const char FoldedScalar = '>';
} }
class Scanner class Scanner
@ -20,8 +34,10 @@ namespace YAML
Scanner(std::istream& in); Scanner(std::istream& in);
~Scanner(); ~Scanner();
Token *ScanNextToken(); void ScanNextToken();
void ScanToNextToken(); void ScanToNextToken();
void PushIndentTo(int column, bool sequence);
void PopIndentTo(int column);
void Scan(); void Scan();
@ -36,6 +52,12 @@ namespace YAML
bool IsBlank(); bool IsBlank();
bool IsDocumentStart(); bool IsDocumentStart();
bool IsDocumentEnd(); bool IsDocumentEnd();
bool IsBlockEntry();
bool IsKey();
bool IsValue();
bool IsPlainScalar();
template <typename T> void ScanAndEnqueue(T *pToken);
template <typename T> T *ScanToken(T *pToken); template <typename T> T *ScanToken(T *pToken);
private: private:
@ -45,6 +67,7 @@ namespace YAML
// the output (tokens) // the output (tokens)
std::queue <Token *> m_tokens; std::queue <Token *> m_tokens;
std::set <Token *> m_limboTokens;
// state info // state info
bool m_startedStream; bool m_startedStream;

17
token.h
View file

@ -3,8 +3,25 @@
namespace YAML namespace YAML
{ {
class Token {}; class Token {};
class StreamStartToken: public Token {}; class StreamStartToken: public Token {};
class StreamEndToken: public Token {}; class StreamEndToken: public Token {};
class DocumentStartToken: public Token {}; class DocumentStartToken: public Token {};
class DocumentEndToken: public Token {}; class DocumentEndToken: public Token {};
class BlockSeqStartToken: public Token {};
class BlockMapStartToken: public Token {};
class BlockEndToken: public Token {};
class BlockEntryToken: public Token {};
class FlowSeqStartToken: public Token {};
class FlowMapStartToken: public Token {};
class FlowSeqEndToken: public Token {};
class FlowMapEndToken: public Token {};
class FlowEntryToken: public Token {};
class KeyToken: public Token {};
class ValueToken: public Token {};
class PlainScalarToken: public Token {};
} }

View file

@ -211,6 +211,10 @@
RelativePath=".\document.h" RelativePath=".\document.h"
> >
</File> </File>
<File
RelativePath=".\exceptions.h"
>
</File>
<File <File
RelativePath=".\map.h" RelativePath=".\map.h"
> >