Refactored common scalar scanning code (from plain, quoted, and block) to one function.

This commit is contained in:
Jesse Beder 2008-06-29 05:45:41 +00:00
parent 6c193d6fbd
commit 0d5a97bffe
6 changed files with 306 additions and 188 deletions

View file

@ -75,74 +75,77 @@ namespace YAML
// and in-line whitespace (which is kept) separately.
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
//// now eat and store the scalar
//std::string scalar;
//WhitespaceInfo info;
//while(INPUT) {
// // doc start/end tokens
// if(IsDocumentStart() || IsDocumentEnd())
// break;
// // comment
// if(Exp::Comment.Matches(INPUT))
// break;
// // first eat non-blanks
// while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// // illegal colon in flow context
// if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
// throw IllegalScalar();
// // characters that might end the scalar
// if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
// break;
// if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
// break;
// // finally, read the character!
// scalar += GetChar();
// }
// // did we hit a non-blank character that ended us?
// if(!Exp::BlankOrBreak.Matches(INPUT))
// break;
// // now eat blanks
// while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
// if(Exp::Blank.Matches(INPUT)) {
// // can't use tabs as indentation! only spaces!
// if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
// throw IllegalTabInScalar();
// info.AddBlank(GetChar());
// } else {
// // we know it's a line break; see how many characters to read
// int n = Exp::Break.Match(INPUT);
// std::string line = GetChar(n);
// info.AddBreak(line);
// // and we can't continue a simple key to the next line
// ValidateSimpleKey();
// }
// }
// // break if we're below the indentation level
// if(m_flowLevel == 0 && m_column <= m_indents.top())
// break;
// // finally join whitespace
// scalar += info.Join();
//}
RegEx end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
int indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
pToken->value = ScanScalar(end, false, indent, 0, true, true, true, 0);
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
// can't use tabs as indentation! only spaces!
if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
// and we can't continue a simple key to the next line
ValidateSimpleKey();
}
}
// break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
// finally join whitespace
scalar += info.Join();
}
// now modify our token
pToken->value = scalar;
if(info.leadingBlanks)
if(true/*info.leadingBlanks*/)
m_simpleKeyAllowed = true;
return pToken;
@ -151,91 +154,92 @@ namespace YAML
// QuotedScalarToken
template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
{
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
m_simpleKeyAllowed = false;
//// now eat and store the scalar
//std::string scalar;
//WhitespaceInfo info;
//while(INPUT) {
// if(IsDocumentStart() || IsDocumentEnd())
// throw DocIndicatorInQuote();
// if(INPUT.peek() == EOF)
// throw EOFInQuote();
// // first eat non-blanks
// while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// // escaped single quote?
// if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) {
// int n = Exp::EscSingleQuote.Match(INPUT);
// scalar += GetChar(n);
// continue;
// }
// // is the quote ending?
// if(INPUT.peek() == quote)
// break;
// // escaped newline?
// if(Exp::EscBreak.Matches(INPUT))
// break;
// // other escape sequence
// if(INPUT.peek() == '\\') {
// int length = 0;
// scalar += Exp::Escape(INPUT, length);
// m_column += length;
// continue;
// }
// // and finally, just add the damn character
// scalar += GetChar();
// }
// // is the quote ending?
// if(INPUT.peek() == quote) {
// // eat and go
// GetChar();
// break;
// }
// // now we eat blanks
// while(Exp::BlankOrBreak.Matches(INPUT)) {
// if(Exp::Blank.Matches(INPUT)) {
// info.AddBlank(GetChar());
// } else {
// // we know it's a line break; see how many characters to read
// int n = Exp::Break.Match(INPUT);
// std::string line = GetChar(n);
// info.AddBreak(line);
// // and we can't continue a simple key to the next line
// ValidateSimpleKey();
// }
// }
// // and finally join the whitespace
// scalar += info.Join();
//}
// eat single or double quote
char quote = GetChar();
pToken->single = (quote == '\'');
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
RegEx end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
char escape = (pToken->single ? '\'' : '\\');
while(INPUT) {
if(IsDocumentStart() || IsDocumentEnd())
throw DocIndicatorInQuote();
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
if(INPUT.peek() == EOF)
throw EOFInQuote();
pToken->value = ScanScalar(end, true, 0, escape, true, true, false, 0);
m_simpleKeyAllowed = false;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// escaped single quote?
if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) {
int n = Exp::EscSingleQuote.Match(INPUT);
scalar += GetChar(n);
continue;
}
// is the quote ending?
if(INPUT.peek() == quote)
break;
// escaped newline?
if(Exp::EscBreak.Matches(INPUT))
break;
// other escape sequence
if(INPUT.peek() == '\\') {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
continue;
}
// and finally, just add the damn character
scalar += GetChar();
}
// is the quote ending?
if(INPUT.peek() == quote) {
// eat and go
GetChar();
break;
}
// now we eat blanks
while(Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
// and we can't continue a simple key to the next line
ValidateSimpleKey();
}
}
// and finally join the whitespace
scalar += info.Join();
}
pToken->value = scalar;
return pToken;
}
// BlockScalarToken
template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken)
{
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
WhitespaceInfo info;
// eat block indicator ('|' or '>')
@ -268,37 +272,13 @@ namespace YAML
if(info.increment && m_indents.top() >= 0)
indent += m_indents.top();
// finally, grab that scalar
std::string scalar;
while(INPUT) {
// initialize indentation
GetBlockIndentation(indent, info.trailingBreaks);
GetBlockIndentation(indent, info.trailingBreaks);
// are we done with this guy (i.e. at a lower indentation?)
if(m_column != indent)
break;
bool trailingBlank = Exp::Blank.Matches(INPUT);
scalar += info.Join();
bool leadingBlank = Exp::Blank.Matches(INPUT);
// now eat and save the line
while(INPUT.peek() != EOF && !Exp::Break.Matches(INPUT))
scalar += GetChar();
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
}
// one last whitespace join (with chompers this time)
scalar += info.Join(true);
// finally set the scalar
pToken->value = scalar;
bool eatLeadingWhitespace = false;
pToken->value = ScanScalar(RegEx(), false, indent, 0, info.fold, eatLeadingWhitespace, false, info.chomp);
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
return pToken;
}
@ -340,4 +320,104 @@ namespace YAML
indent = 1;
}
}
// ScanScalar
std::string Scanner::ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp)
{
bool emptyLine = false, moreIndented = false;
std::string scalar;
while(INPUT) {
// ********************************
// Phase #1: scan until line ending
while(!end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) {
if(INPUT.peek() == EOF)
break;
// escaped newline? (only if we're escaping on slash)
if(escape == '\\' && Exp::EscBreak.Matches(INPUT)) {
int n = Exp::EscBreak.Match(INPUT);
Eat(n);
continue;
}
// escape this?
if(INPUT.peek() == escape) {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
continue;
}
// otherwise, just add the damn character
scalar += GetChar();
}
// eof? if we're looking to eat something, then we throw
if(INPUT.peek() == EOF) {
if(eatEnd)
throw EOFInQuote();
break;
}
// are we done via character match?
int n = end.Match(INPUT);
if(n >= 0) {
if(eatEnd)
Eat(n);
break;
}
// ********************************
// Phase #2: eat line ending
n = Exp::Break.Match(INPUT);
Eat(n);
// ********************************
// Phase #3: scan initial spaces
// first the required indentation
while(INPUT.peek() == ' ' && m_column < indent)
Eat(1);
// and then the rest of the whitespace
if(eatLeadingWhitespace) {
while(Exp::Blank.Matches(INPUT))
Eat(1);
}
// was this an empty line?
bool nextEmptyLine = Exp::Break.Matches(INPUT);
bool nextMoreIndented = (INPUT.peek() == ' ');
if(fold && !emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented)
scalar += " ";
else
scalar += "\n";
emptyLine = nextEmptyLine;
moreIndented = nextMoreIndented;
// are we done via indentation?
if(!emptyLine && m_column < indent)
break;
}
// post-processing
if(trimTrailingSpaces) {
unsigned pos = scalar.find_last_not_of(' ');
if(pos < scalar.size())
scalar.erase(pos + 1);
}
if(chomp <= 0) {
unsigned pos = scalar.find_last_not_of('\n');
if(chomp == 0 && pos + 1 < scalar.size())
scalar.erase(pos + 2);
else if(chomp == -1 && pos < scalar.size())
scalar.erase(pos + 1);
}
return scalar;
}
}