Split off the specific regular expressions, and the specialized token-scanning functions, into their own files.

This commit is contained in:
Jesse Beder 2008-06-27 19:13:03 +00:00
parent de29068110
commit 8fca02fb2a
5 changed files with 356 additions and 332 deletions

53
exp.h Normal file
View file

@ -0,0 +1,53 @@
#pragma once
#include "regex.h"
namespace YAML
{
////////////////////////////////////////////////////////////////////////////////
// Here we store a bunch of expressions for matching different parts of the file.
namespace Exp
{
// misc
const RegEx Blank = RegEx(' ') || RegEx('\t');
const RegEx Break = RegEx('\n');
const RegEx BlankOrBreak = Blank || Break;
// actual tags
const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
const RegEx Key = RegEx('?'),
KeyInFlow = RegEx('?') + BlankOrBreak;
const RegEx Value = RegEx(':'),
ValueInFlow = RegEx(':') + BlankOrBreak;
const RegEx Comment = RegEx('#');
// Plain scalar rules:
// . Cannot start with a blank.
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? : are illegal and - must not be followed with a space.
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
const RegEx EndScalar = RegEx(':') + BlankOrBreak,
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
}
namespace Keys
{
const char FlowSeqStart = '[';
const char FlowSeqEnd = ']';
const char FlowMapStart = '{';
const char FlowMapEnd = '}';
const char FlowEntry = ',';
const char Alias = '*';
const char Anchor = '&';
const char Tag = '!';
const char LiteralScalar = '|';
const char FoldedScalar = '>';
}
}

View file

@ -1,6 +1,7 @@
#include "scanner.h" #include "scanner.h"
#include "token.h" #include "token.h"
#include "exceptions.h" #include "exceptions.h"
#include "exp.h"
#include <iostream> #include <iostream>
namespace YAML namespace YAML
@ -124,9 +125,6 @@ namespace YAML
return Exp::PlainScalar.Matches(INPUT); return Exp::PlainScalar.Matches(INPUT);
} }
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// ScanAndEnqueue // ScanAndEnqueue
// . Scans the token, then pushes it in the queue. // . Scans the token, then pushes it in the queue.
// . Note: we also use a set of "limbo tokens", i.e., tokens // . Note: we also use a set of "limbo tokens", i.e., tokens
@ -141,290 +139,6 @@ namespace YAML
m_limboTokens.erase(pToken); m_limboTokens.erase(pToken);
} }
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
m_endedStream = true;
return pToken;
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
PopIndentTo(m_column);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
{
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
{
// we better be in the block context!
if(m_flowLevel == 0) {
// can we put it here?
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true); // , -1
} else {
// TODO: throw?
}
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
{
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
}
// TODO: "remove simple key"
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
{
// TODO: Is it a simple key?
if(false) {
} else {
// If not, ...
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
}
}
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
// TODO: "save simple key"
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar, whitespace, leadingBreaks, trailingBreaks;
bool leadingBlanks = false;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
if(leadingBlanks) {
if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') {
// fold line break?
if(trailingBreaks.empty())
scalar += ' ';
else {
scalar += trailingBreaks;
trailingBreaks = "";
}
} else {
scalar += leadingBreaks + trailingBreaks;
leadingBreaks = "";
trailingBreaks = "";
}
} else if(!whitespace.empty()) {
scalar += whitespace;
whitespace = "";
}
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
// maybe store this character
if(!leadingBlanks)
whitespace += GetChar();
else
Eat(1);
} else {
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += GetChar();
} else
trailingBreaks += GetChar();
}
}
// and finally break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
}
// now modify our token
pToken->SetValue(scalar);
if(leadingBlanks)
m_simpleKeyAllowed = true;
return pToken;
}
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// The main scanning function // The main scanning function

View file

@ -5,56 +5,11 @@
#include <queue> #include <queue>
#include <stack> #include <stack>
#include <set> #include <set>
#include "regex.h"
namespace YAML namespace YAML
{ {
class Token; class Token;
namespace Exp
{
// misc
const RegEx Blank = RegEx(' ') || RegEx('\t');
const RegEx Break = RegEx('\n');
const RegEx BlankOrBreak = Blank || Break;
// actual tags
const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
const RegEx Key = RegEx('?'),
KeyInFlow = RegEx('?') + BlankOrBreak;
const RegEx Value = RegEx(':'),
ValueInFlow = RegEx(':') + BlankOrBreak;
const RegEx Comment = RegEx('#');
// Plain scalar rules:
// . Cannot start with a blank.
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? : are illegal and - must not be followed with a space.
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
const RegEx EndScalar = RegEx(':') + BlankOrBreak,
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
}
namespace Keys
{
const char FlowSeqStart = '[';
const char FlowSeqEnd = ']';
const char FlowMapStart = '{';
const char FlowMapEnd = '}';
const char FlowEntry = ',';
const char Alias = '*';
const char Anchor = '&';
const char Tag = '!';
const char LiteralScalar = '|';
const char FoldedScalar = '>';
}
class Scanner class Scanner
{ {
public: public:

294
scantoken.cpp Normal file
View file

@ -0,0 +1,294 @@
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
namespace YAML
{
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
m_endedStream = true;
return pToken;
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
PopIndentTo(m_column);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
{
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
{
// we better be in the block context!
if(m_flowLevel == 0) {
// can we put it here?
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true); // , -1
} else {
// TODO: throw?
}
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
{
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
}
// TODO: "remove simple key"
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
{
// TODO: Is it a simple key?
if(false) {
} else {
// If not, ...
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
}
}
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
// TODO: "save simple key"
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar, whitespace, leadingBreaks, trailingBreaks;
bool leadingBlanks = false;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
if(leadingBlanks) {
if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') {
// fold line break?
if(trailingBreaks.empty())
scalar += ' ';
else {
scalar += trailingBreaks;
trailingBreaks = "";
}
} else {
scalar += leadingBreaks + trailingBreaks;
leadingBreaks = "";
trailingBreaks = "";
}
} else if(!whitespace.empty()) {
scalar += whitespace;
whitespace = "";
}
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
// maybe store this character
if(!leadingBlanks)
whitespace += GetChar();
else
Eat(1);
} else {
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += GetChar();
} else
trailingBreaks += GetChar();
}
}
// and finally break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
}
// now modify our token
pToken->SetValue(scalar);
if(leadingBlanks)
m_simpleKeyAllowed = true;
return pToken;
}
}

View file

@ -197,6 +197,10 @@
RelativePath=".\scanner.cpp" RelativePath=".\scanner.cpp"
> >
</File> </File>
<File
RelativePath=".\scantoken.cpp"
>
</File>
<File <File
RelativePath=".\sequence.cpp" RelativePath=".\sequence.cpp"
> >
@ -219,6 +223,10 @@
RelativePath=".\exceptions.h" RelativePath=".\exceptions.h"
> >
</File> </File>
<File
RelativePath=".\exp.h"
>
</File>
<File <File
RelativePath=".\map.h" RelativePath=".\map.h"
> >