rpcs3/scantoken.cpp

408 lines
9 KiB
C++

#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
namespace YAML
{
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
PopIndentTo(-1);
ValidateAllSimpleKeys();
m_simpleKeyAllowed = false;
m_endedStream = true;
return pToken;
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
PopIndentTo(m_column);
ValidateAllSimpleKeys();
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{
PopIndentTo(-1);
ValidateAllSimpleKeys();
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{
// flow sequences can be simple keys
InsertSimpleKey();
IncreaseFlowLevel();
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// flow maps can be simple keys
InsertSimpleKey();
IncreaseFlowLevel();
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{
// ValidateSimpleKey();
DecreaseFlowLevel();
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
//ValidateSimpleKey();
DecreaseFlowLevel();
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
{
//ValidateSimpleKey();
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
{
//ValidateSimpleKey();
// we better be in the block context!
if(m_flowLevel == 0) {
// can we put it here?
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true); // , -1
} else {
// TODO: throw?
}
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
{
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
}
// TODO: "remove simple key"
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
{
// does this follow a simple key?
// bool isValidKey = ValidateSimpleKey();
if(m_isLastKeyValid) {
// can't follow a simple key with another simple key (dunno why, though - it seems fine)
m_simpleKeyAllowed = false;
} else {
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
}
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
}
// eat
Eat(1);
return pToken;
}
// PlainScalarToken
// . We scan these in passes of two steps each: First, grab all non-whitespace
// characters we can, and then grab all whitespace characters we can.
// . This has the benefit of letting us handle leading whitespace (which is chomped)
// and in-line whitespace (which is kept) separately.
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
// insert a potential simple key
InsertSimpleKey();
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
// can't use tabs as indentation! only spaces!
if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
// and we can't continue a simple key to the next line
ValidateSimpleKey();
}
}
// break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
// finally join whitespace
scalar += info.Join();
}
// now modify our token
pToken->value = scalar;
if(info.leadingBlanks)
m_simpleKeyAllowed = true;
return pToken;
}
// QuotedScalarToken
template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
{
// insert a potential simple key
InsertSimpleKey();
m_simpleKeyAllowed = false;
// eat single or double quote
char quote = GetChar();
bool single = (quote == '\'');
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
while(INPUT) {
if(IsDocumentStart() || IsDocumentEnd())
throw DocIndicatorInQuote();
if(INPUT.peek() == EOF)
throw EOFInQuote();
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// escaped single quote?
if(single && Exp::EscSingleQuote.Matches(INPUT)) {
int n = Exp::EscSingleQuote.Match(INPUT);
scalar += GetChar(n);
continue;
}
// is the quote ending?
if(INPUT.peek() == (single ? '\'' : '\"'))
break;
// escaped newline?
if(Exp::EscBreak.Matches(INPUT))
break;
// other escape sequence
if(INPUT.peek() == '\\') {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
continue;
}
// and finally, just add the damn character
scalar += GetChar();
}
// is the quote ending?
if(INPUT.peek() == (single ? '\'' : '\"')) {
// eat and go
GetChar();
break;
}
// now we eat blanks
while(Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
// and we can't continue a simple key to the next line
ValidateSimpleKey();
}
}
// and finally join the whitespace
scalar += info.Join();
}
pToken->value = scalar;
return pToken;
}
//////////////////////////////////////////////////////////
// WhitespaceInfo stuff
Scanner::WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false)
{
}
void Scanner::WhitespaceInfo::AddBlank(char ch)
{
if(!leadingBlanks)
whitespace += ch;
}
void Scanner::WhitespaceInfo::AddBreak(const std::string& line)
{
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += line;
} else
trailingBreaks += line;
}
std::string Scanner::WhitespaceInfo::Join()
{
std::string ret;
if(leadingBlanks) {
if(Exp::Break.Matches(leadingBreaks)) {
// fold line break?
if(trailingBreaks.empty())
ret = " ";
else
ret = trailingBreaks;
} else {
ret = leadingBreaks + trailingBreaks;
}
leadingBlanks = false;
leadingBreaks = "";
trailingBreaks = "";
} else if(!whitespace.empty()) {
ret = whitespace;
whitespace = "";
}
return ret;
}
}