Instead of deriving different tokens from a base Token class, we now use an enumerated TOKEN_TYPE to distinguish types. This is so we don't have to cast all the time when parsing the resulting token stream.

Also, removed start/end stream tokens.
This commit is contained in:
Jesse Beder 2008-06-30 21:47:21 +00:00
parent b6a0ef207b
commit 852e5b63e5
6 changed files with 201 additions and 214 deletions

View file

@ -8,9 +8,6 @@ namespace YAML
{
Parser::Parser(std::istream& in): m_scanner(in)
{
// eat the stream start token
// TODO: check?
Token *pToken = m_scanner.GetNextToken();
}
Parser::~Parser()
@ -25,7 +22,7 @@ namespace YAML
if(!pToken)
break;
std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl;
std::cout << *pToken << std::endl;
delete pToken;
}
getchar();

View file

@ -16,10 +16,6 @@ namespace YAML
delete m_tokens.front();
m_tokens.pop();
}
// delete limbo tokens (they're here for RAII)
for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
delete *it;
}
// GetNextToken
@ -78,7 +74,7 @@ namespace YAML
return;
if(!m_startedStream)
return ScanAndEnqueue(new StreamStartToken);
return StartStream();
// get rid of whitespace, etc. (in between tokens it should be irrelevent)
ScanToNextToken();
@ -95,62 +91,56 @@ namespace YAML
// end of stream
if(INPUT.peek() == EOF)
return ScanAndEnqueue(new StreamEndToken);
return EndStream();
if(INPUT.column == 0 && INPUT.peek() == Keys::Directive)
return ScanAndEnqueue(new DirectiveToken);
return ScanDirective();
// document token
if(INPUT.column == 0 && Exp::DocStart.Matches(INPUT))
return ScanAndEnqueue(new DocumentStartToken);
return ScanDocStart();
if(INPUT.column == 0 && Exp::DocEnd.Matches(INPUT))
return ScanAndEnqueue(new DocumentEndToken);
return ScanDocEnd();
// flow start/end/entry
if(INPUT.peek() == Keys::FlowSeqStart)
return ScanAndEnqueue(new FlowSeqStartToken);
if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
return ScanFlowStart();
if(INPUT.peek() == Keys::FlowSeqEnd)
return ScanAndEnqueue(new FlowSeqEndToken);
if(INPUT.peek() == Keys::FlowMapStart)
return ScanAndEnqueue(new FlowMapStartToken);
if(INPUT.peek() == Keys::FlowMapEnd)
return ScanAndEnqueue(new FlowMapEndToken);
if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
return ScanFlowEnd();
if(INPUT.peek() == Keys::FlowEntry)
return ScanAndEnqueue(new FlowEntryToken);
return ScanFlowEntry();
// block/map stuff
if(Exp::BlockEntry.Matches(INPUT))
return ScanAndEnqueue(new BlockEntryToken);
return ScanBlockEntry();
if((m_flowLevel == 0 ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
return ScanAndEnqueue(new KeyToken);
return ScanKey();
if((m_flowLevel == 0 ? Exp::Value : Exp::ValueInFlow).Matches(INPUT))
return ScanAndEnqueue(new ValueToken);
return ScanValue();
// alias/anchor
if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
return ScanAndEnqueue(new AnchorToken);
return ScanAnchorOrAlias();
// tag
if(INPUT.peek() == Keys::Tag)
return ScanAndEnqueue(new TagToken);
return ScanTag();
// special scalars
if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
return ScanAndEnqueue(new BlockScalarToken);
return ScanBlockScalar();
if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
return ScanAndEnqueue(new QuotedScalarToken);
return ScanQuotedScalar();
// plain scalars
if((m_flowLevel == 0 ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
return ScanAndEnqueue(new PlainScalarToken);
return ScanPlainScalar();
// don't know what it is!
throw UnknownToken();
@ -210,18 +200,28 @@ namespace YAML
return false;
}
// ScanAndEnqueue
// . Scans the token, then pushes it in the queue.
// . Note: we also use a set of "limbo tokens", i.e., tokens
// that haven't yet been pushed. This way, if ScanToken()
// throws an exception, we'll be keeping track of 'pToken'
// somewhere, and it will be automatically cleaned up when
// the Scanner destructs.
template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
// StartStream
// . Set the initial conditions for starting a stream.
void Scanner::StartStream()
{
m_limboTokens.insert(pToken);
m_tokens.push(ScanToken(pToken));
m_limboTokens.erase(pToken);
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
}
// EndStream
// . Close out the stream, finish up, etc.
void Scanner::EndStream()
{
// force newline
if(INPUT.column > 0)
INPUT.column = 0;
PopIndentTo(-1);
VerifyAllSimpleKeys();
m_simpleKeyAllowed = false;
m_endedStream = true;
}
// PushIndentTo
@ -241,9 +241,9 @@ namespace YAML
// now push
m_indents.push(column);
if(sequence)
m_tokens.push(new BlockSeqStartToken);
m_tokens.push(new Token(TT_BLOCK_SEQ_START));
else
m_tokens.push(new BlockMapStartToken);
m_tokens.push(new Token(TT_BLOCK_MAP_START));
return m_tokens.front();
}
@ -260,7 +260,7 @@ namespace YAML
// now pop away
while(!m_indents.empty() && m_indents.top() > column) {
m_indents.pop();
m_tokens.push(new BlockEndToken);
m_tokens.push(new Token(TT_BLOCK_END));
}
}
}

View file

@ -23,6 +23,8 @@ namespace YAML
private:
// scanning
void StartStream();
void EndStream();
void ScanNextToken();
void ScanToNextToken();
Token *PushIndentTo(int column, bool sequence);
@ -46,8 +48,24 @@ namespace YAML
Token *pMapStart, *pKey;
};
template <typename T> void ScanAndEnqueue(T *pToken);
template <typename T> T *ScanToken(T *pToken);
// and the tokens
void ScanDirective();
void ScanDocStart();
void ScanDocEnd();
void ScanBlockSeqStart();
void ScanBlockMapSTart();
void ScanBlockEnd();
void ScanBlockEntry();
void ScanFlowStart();
void ScanFlowEnd();
void ScanFlowEntry();
void ScanKey();
void ScanValue();
void ScanAnchorOrAlias();
void ScanTag();
void ScanPlainScalar();
void ScanQuotedScalar();
void ScanBlockScalar();
private:
// the stream
@ -55,7 +73,6 @@ namespace YAML
// the output (tokens)
std::queue <Token *> m_tokens;
std::set <Token *> m_limboTokens;
// state info
bool m_startedStream, m_endedStream;

View file

@ -9,36 +9,13 @@ namespace YAML
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(INPUT.column > 0)
INPUT.column = 0;
PopIndentTo(-1);
VerifyAllSimpleKeys();
m_simpleKeyAllowed = false;
m_endedStream = true;
return pToken;
}
// DirectiveToken
// Directive
// . Note: no semantic checking is done here (that's for the parser to do)
template <> DirectiveToken *Scanner::ScanToken(DirectiveToken *pToken)
void Scanner::ScanDirective()
{
std::string name;
std::vector <std::string> params;
// pop indents and simple keys
PopIndentTo(-1);
VerifyAllSimpleKeys();
@ -50,7 +27,7 @@ namespace YAML
// read name
while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
pToken->name += INPUT.GetChar();
name += INPUT.GetChar();
// read parameters
while(1) {
@ -67,14 +44,17 @@ namespace YAML
while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
param += INPUT.GetChar();
pToken->params.push_back(param);
params.push_back(param);
}
return pToken;
Token *pToken = new Token(TT_DIRECTIVE);
pToken->value = name;
pToken->params = params;
m_tokens.push(pToken);
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
// DocStart
void Scanner::ScanDocStart()
{
PopIndentTo(INPUT.column);
VerifyAllSimpleKeys();
@ -82,11 +62,11 @@ namespace YAML
// eat
INPUT.Eat(3);
return pToken;
m_tokens.push(new Token(TT_DOC_START));
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
// DocEnd
void Scanner::ScanDocEnd()
{
PopIndentTo(-1);
VerifyAllSimpleKeys();
@ -94,37 +74,25 @@ namespace YAML
// eat
INPUT.Eat(3);
return pToken;
m_tokens.push(new Token(TT_DOC_END));
}
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
// FlowStart
void Scanner::ScanFlowStart()
{
// flow sequences can be simple keys
// flows can be simple keys
InsertSimpleKey();
m_flowLevel++;
m_simpleKeyAllowed = true;
// eat
INPUT.Eat(1);
return pToken;
char ch = INPUT.GetChar();
TOKEN_TYPE type = (ch == Keys::FlowSeqStart ? TT_FLOW_SEQ_START : TT_FLOW_MAP_START);
m_tokens.push(new Token(type));
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// flow maps can be simple keys
InsertSimpleKey();
m_flowLevel++;
m_simpleKeyAllowed = true;
// eat
INPUT.Eat(1);
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
// FlowEnd
void Scanner::ScanFlowEnd()
{
if(m_flowLevel == 0)
throw IllegalFlowEnd();
@ -133,36 +101,23 @@ namespace YAML
m_simpleKeyAllowed = false;
// eat
INPUT.Eat(1);
return pToken;
char ch = INPUT.GetChar();
TOKEN_TYPE type = (ch == Keys::FlowSeqEnd ? TT_FLOW_SEQ_END : TT_FLOW_MAP_END);
m_tokens.push(new Token(type));
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
if(m_flowLevel == 0)
throw IllegalFlowEnd();
m_flowLevel--;
m_simpleKeyAllowed = false;
// eat
INPUT.Eat(1);
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
// FlowEntry
void Scanner::ScanFlowEntry()
{
m_simpleKeyAllowed = true;
// eat
INPUT.Eat(1);
return pToken;
m_tokens.push(new Token(TT_FLOW_ENTRY));
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
// BlockEntry
void Scanner::ScanBlockEntry()
{
// we better be in the block context!
if(m_flowLevel > 0)
@ -177,11 +132,11 @@ namespace YAML
// eat
INPUT.Eat(1);
return pToken;
m_tokens.push(new Token(TT_BLOCK_ENTRY));
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
// Key
void Scanner::ScanKey()
{
// handle keys diffently in the block context (and manage indents)
if(m_flowLevel == 0) {
@ -199,11 +154,11 @@ namespace YAML
// eat
INPUT.Eat(1);
return pToken;
m_tokens.push(new Token(TT_KEY));
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
// Value
void Scanner::ScanValue()
{
// does this follow a simple key?
if(m_isLastKeyValid) {
@ -227,12 +182,15 @@ namespace YAML
// eat
INPUT.Eat(1);
return pToken;
m_tokens.push(new Token(TT_VALUE));
}
// AnchorToken
template <> AnchorToken *Scanner::ScanToken(AnchorToken *pToken)
// AnchorOrAlias
void Scanner::ScanAnchorOrAlias()
{
bool alias;
std::string tag;
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
@ -240,10 +198,9 @@ namespace YAML
// eat the indicator
char indicator = INPUT.GetChar();
pToken->alias = (indicator == Keys::Alias);
alias = (indicator == Keys::Alias);
// now eat the content
std::string tag;
while(Exp::AlphaNumeric.Matches(INPUT))
tag += INPUT.GetChar();
@ -256,13 +213,16 @@ namespace YAML
throw IllegalCharacterInAnchor();
// and we're done
Token *pToken = new Token(alias ? TT_ALIAS : TT_ANCHOR);
pToken->value = tag;
return pToken;
m_tokens.push(pToken);
}
// TagToken
template <> TagToken *Scanner::ScanToken(TagToken *pToken)
// Tag
void Scanner::ScanTag()
{
std::string handle, suffix;
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
@ -273,7 +233,7 @@ namespace YAML
// read the handle
while(INPUT.peek() != EOF && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT))
pToken->handle += INPUT.GetChar();
handle += INPUT.GetChar();
// is there a suffix?
if(INPUT.peek() == Keys::Tag) {
@ -282,15 +242,20 @@ namespace YAML
// then read it
while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
pToken->suffix += INPUT.GetChar();
suffix += INPUT.GetChar();
}
return pToken;
Token *pToken = new Token(TT_TAG);
pToken->value = handle;
pToken->params.push_back(suffix);
m_tokens.push(pToken);
}
// PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
// PlainScalar
void Scanner::ScanPlainScalar()
{
std::string scalar;
// set up the scanning parameters
ScanScalarParams params;
params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
@ -307,7 +272,7 @@ namespace YAML
if(m_simpleKeyAllowed)
InsertSimpleKey();
pToken->value = ScanScalar(INPUT, params);
scalar = ScanScalar(INPUT, params);
// can have a simple key only if we ended the scalar by starting a new line
m_simpleKeyAllowed = params.leadingSpaces;
@ -317,21 +282,25 @@ namespace YAML
if(Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
return pToken;
Token *pToken = new Token(TT_SCALAR);
pToken->value = scalar;
m_tokens.push(pToken);
}
// QuotedScalarToken
template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
// QuotedScalar
void Scanner::ScanQuotedScalar()
{
std::string scalar;
// eat single or double quote
char quote = INPUT.GetChar();
pToken->single = (quote == '\'');
bool single = (quote == '\'');
// setup the scanning parameters
ScanScalarParams params;
params.end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
params.eatEnd = true;
params.escape = (pToken->single ? '\'' : '\\');
params.escape = (single ? '\'' : '\\');
params.indent = 0;
params.fold = true;
params.eatLeadingWhitespace = true;
@ -343,18 +312,22 @@ namespace YAML
if(m_simpleKeyAllowed)
InsertSimpleKey();
pToken->value = ScanScalar(INPUT, params);
scalar = ScanScalar(INPUT, params);
m_simpleKeyAllowed = false;
return pToken;
Token *pToken = new Token(TT_SCALAR);
pToken->value = scalar;
m_tokens.push(pToken);
}
// BlockScalarToken
// . These need a little extra processing beforehand.
// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
// and then we need to figure out what level of indentation we'll be using.
template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken)
void Scanner::ScanBlockScalar()
{
std::string scalar;
ScanScalarParams params;
params.indent = 1;
params.detectIndent = true;
@ -401,10 +374,13 @@ namespace YAML
params.trimTrailingSpaces = false;
params.onTabInIndentation = THROW;
pToken->value = ScanScalar(INPUT, params);
scalar = ScanScalar(INPUT, params);
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
return pToken;
Token *pToken = new Token(TT_SCALAR);
pToken->value = scalar;
m_tokens.push(pToken);
}
}

View file

@ -44,7 +44,7 @@ namespace YAML
// key.required = true; // TODO: is this correct?
// then add the (now unverified) key
key.pKey = new KeyToken;
key.pKey = new Token(TT_KEY);
key.pKey->status = TS_UNVERIFIED;
m_tokens.push(key.pKey);

111
token.h
View file

@ -7,64 +7,61 @@
namespace YAML
{
enum TOKEN_STATUS { TS_VALID, TS_INVALID, TS_UNVERIFIED };
enum TOKEN_TYPE {
TT_DIRECTIVE,
TT_DOC_START,
TT_DOC_END,
TT_BLOCK_SEQ_START,
TT_BLOCK_MAP_START,
TT_BLOCK_END,
TT_BLOCK_ENTRY,
TT_FLOW_SEQ_START,
TT_FLOW_MAP_START,
TT_FLOW_SEQ_END,
TT_FLOW_MAP_END,
TT_FLOW_ENTRY,
TT_KEY,
TT_VALUE,
TT_ANCHOR,
TT_ALIAS,
TT_TAG,
TT_SCALAR,
};
const std::string TokenNames[] = {
"DIRECTIVE",
"DOC_START",
"DOC_END",
"BLOCK_SEQ_START",
"BLOCK_MAP_START",
"BLOCK_END",
"BLOCK_ENTRY",
"FLOW_SEQ_START",
"FLOW_MAP_START",
"FLOW_SEQ_END",
"FLOW_MAP_END",
"FLOW_ENTRY",
"KEY",
"VALUE",
"ANCHOR",
"ALIAS",
"TAG",
"SCALAR",
};
struct Token {
Token(): status(TS_VALID) {}
virtual ~Token() {}
virtual void Write(std::ostream& out) const {}
Token(TOKEN_TYPE type_): status(TS_VALID), type(type_) {}
friend std::ostream& operator << (std::ostream& out, const Token& token) { token.Write(out); return out; }
TOKEN_STATUS status;
};
struct StreamStartToken: public Token {};
struct StreamEndToken: public Token {};
struct DirectiveToken: public Token {
std::string name;
std::vector <std::string> params;
virtual void Write(std::ostream& out) const { out << name; for(unsigned i=0;i<params.size();i++) out << " " << params[i]; }
};
struct DocumentStartToken: public Token {};
struct DocumentEndToken: public Token {};
struct BlockSeqStartToken: public Token {};
struct BlockMapStartToken: public Token {};
struct BlockEndToken: public Token {};
struct BlockEntryToken: public Token {};
struct FlowSeqStartToken: public Token {};
struct FlowMapStartToken: public Token {};
struct FlowSeqEndToken: public Token {};
struct FlowMapEndToken: public Token {};
struct FlowEntryToken: public Token {};
struct KeyToken: public Token {};
struct ValueToken: public Token {};
struct AnchorToken: public Token {
bool alias;
std::string value;
virtual void Write(std::ostream& out) const { out << (alias ? '*' : '&') << value; }
};
struct TagToken: public Token {
std::string handle, suffix;
virtual void Write(std::ostream& out) const { out << "!" << handle << "!" << suffix; }
};
struct ScalarToken: public Token {
std::string value;
virtual void Write(std::ostream& out) const { out << value; }
};
struct PlainScalarToken: public ScalarToken {};
struct QuotedScalarToken: public ScalarToken {
bool single;
virtual void Write(std::ostream& out) const { out << (single ? '\'' : '\"') << value << (single ? '\'' : '\"'); }
};
struct BlockScalarToken: public ScalarToken {};
friend std::ostream& operator << (std::ostream& out, const Token& token) {
out << TokenNames[token.type] << ": " << token.value;
for(unsigned i=0;i<token.params.size();i++)
out << " " << token.params[i];
return out;
}
TOKEN_STATUS status;
TOKEN_TYPE type;
std::string value;
std::vector <std::string> params;
};
}