diff --git a/scanner.h b/scanner.h index 037b4b8072..7f06269c55 100644 --- a/scanner.h +++ b/scanner.h @@ -45,20 +45,6 @@ namespace YAML void GetBlockIndentation(int& indent, std::string& breaks); - struct WhitespaceInfo { - WhitespaceInfo(); - - void SetChompers(char ch); - void AddBlank(char ch); - void AddBreak(const std::string& line); - std::string Join(bool lastline = false); - - bool leadingBlanks; - bool fold; - std::string whitespace, leadingBreaks, trailingBreaks; - int chomp, increment; - }; - struct SimpleKey { SimpleKey(int pos_, int line_, int column_, int flowLevel_); diff --git a/scanscalar.cpp b/scanscalar.cpp new file mode 100644 index 0000000000..30744ed56a --- /dev/null +++ b/scanscalar.cpp @@ -0,0 +1,343 @@ +#include "scanscalar.h" +#include "scanner.h" +#include "exp.h" +#include "exceptions.h" +#include "token.h" + +namespace YAML +{ + ////////////////////////////////////////////////////////// + // WhitespaceInfo + + WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false), fold(true), chomp(0), increment(0) + { + } + + void WhitespaceInfo::SetChompers(char ch) + { + if(ch == '+') + chomp = 1; + else if(ch == '-') + chomp = -1; + else if(Exp::Digit.Matches(ch)) { + increment = ch - '0'; + if(increment == 0) + throw ZeroIndentationInBlockScalar(); + } + } + + void WhitespaceInfo::AddBlank(char ch) + { + if(!leadingBlanks) + whitespace += ch; + } + + void WhitespaceInfo::AddBreak(const std::string& line) + { + // where to store this character? + if(!leadingBlanks) { + leadingBlanks = true; + whitespace = ""; + leadingBreaks += line; + } else + trailingBreaks += line; + } + + std::string WhitespaceInfo::Join(bool lastLine) + { + std::string ret; + + if(leadingBlanks) { + // fold line break? + if(fold && Exp::Break.Matches(leadingBreaks) && trailingBreaks.empty() && !lastLine) + ret = " "; + else if(!lastLine || chomp != -1) + ret = leadingBreaks; + + if(!lastLine || chomp == 1) + ret += trailingBreaks; + + leadingBlanks = false; + leadingBreaks = ""; + trailingBreaks = ""; + } else if(!whitespace.empty()) { + ret = whitespace; + whitespace = ""; + } + + return ret; + } + + // PlainScalarToken + // . We scan these in passes of two steps each: First, grab all non-whitespace + // characters we can, and then grab all whitespace characters we can. + // . This has the benefit of letting us handle leading whitespace (which is chomped) + // and in-line whitespace (which is kept) separately. + template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) + { + // insert a potential simple key + if(m_simpleKeyAllowed) + InsertSimpleKey(); + m_simpleKeyAllowed = false; + + // now eat and store the scalar + std::string scalar; + WhitespaceInfo info; + + while(INPUT) { + // doc start/end tokens + if(IsDocumentStart() || IsDocumentEnd()) + break; + + // comment + if(Exp::Comment.Matches(INPUT)) + break; + + // first eat non-blanks + while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { + // illegal colon in flow context + if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) + throw IllegalScalar(); + + // characters that might end the scalar + if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) + break; + if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) + break; + + // finally, read the character! + scalar += GetChar(); + } + + // did we hit a non-blank character that ended us? + if(!Exp::BlankOrBreak.Matches(INPUT)) + break; + + // now eat blanks + while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { + if(Exp::Blank.Matches(INPUT)) { + // can't use tabs as indentation! only spaces! + if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top()) + throw IllegalTabInScalar(); + + info.AddBlank(GetChar()); + } else { + // we know it's a line break; see how many characters to read + int n = Exp::Break.Match(INPUT); + std::string line = GetChar(n); + info.AddBreak(line); + + // and we can't continue a simple key to the next line + ValidateSimpleKey(); + } + } + + // break if we're below the indentation level + if(m_flowLevel == 0 && m_column <= m_indents.top()) + break; + + // finally join whitespace + scalar += info.Join(); + } + + // now modify our token + pToken->value = scalar; + if(info.leadingBlanks) + m_simpleKeyAllowed = true; + + return pToken; + } + + // QuotedScalarToken + template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken) + { + // insert a potential simple key + if(m_simpleKeyAllowed) + InsertSimpleKey(); + m_simpleKeyAllowed = false; + + // eat single or double quote + char quote = GetChar(); + pToken->single = (quote == '\''); + + // now eat and store the scalar + std::string scalar; + WhitespaceInfo info; + + while(INPUT) { + if(IsDocumentStart() || IsDocumentEnd()) + throw DocIndicatorInQuote(); + + if(INPUT.peek() == EOF) + throw EOFInQuote(); + + // first eat non-blanks + while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { + // escaped single quote? + if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) { + int n = Exp::EscSingleQuote.Match(INPUT); + scalar += GetChar(n); + continue; + } + + // is the quote ending? + if(INPUT.peek() == quote) + break; + + // escaped newline? + if(Exp::EscBreak.Matches(INPUT)) + break; + + // other escape sequence + if(INPUT.peek() == '\\') { + int length = 0; + scalar += Exp::Escape(INPUT, length); + m_column += length; + continue; + } + + // and finally, just add the damn character + scalar += GetChar(); + } + + // is the quote ending? + if(INPUT.peek() == quote) { + // eat and go + GetChar(); + break; + } + + // now we eat blanks + while(Exp::BlankOrBreak.Matches(INPUT)) { + if(Exp::Blank.Matches(INPUT)) { + info.AddBlank(GetChar()); + } else { + // we know it's a line break; see how many characters to read + int n = Exp::Break.Match(INPUT); + std::string line = GetChar(n); + info.AddBreak(line); + + // and we can't continue a simple key to the next line + ValidateSimpleKey(); + } + } + + // and finally join the whitespace + scalar += info.Join(); + } + + pToken->value = scalar; + return pToken; + } + + // BlockScalarToken + template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken) + { + // simple keys always ok after block scalars (since we're gonna start a new line anyways) + m_simpleKeyAllowed = true; + + WhitespaceInfo info; + + // eat block indicator ('|' or '>') + char indicator = GetChar(); + info.fold = (indicator == Keys::FoldedScalar); + + // eat chomping/indentation indicators + int n = Exp::Chomp.Match(INPUT); + for(int i=0;i= 0) + indent += m_indents.top(); + + // finally, grab that scalar + std::string scalar; + while(INPUT) { + // initialize indentation + GetBlockIndentation(indent, info.trailingBreaks); + + // are we done with this guy (i.e. at a lower indentation?) + if(m_column != indent) + break; + + bool trailingBlank = Exp::Blank.Matches(INPUT); + scalar += info.Join(); + + bool leadingBlank = Exp::Blank.Matches(INPUT); + + // now eat and save the line + while(INPUT.peek() != EOF && !Exp::Break.Matches(INPUT)) + scalar += GetChar(); + + // we know it's a line break; see how many characters to read + int n = Exp::Break.Match(INPUT); + std::string line = GetChar(n); + info.AddBreak(line); + } + + // one last whitespace join (with chompers this time) + scalar += info.Join(true); + + // finally set the scalar + pToken->value = scalar; + + return pToken; + } + + // GetBlockIndentation + // . Helper to scanning a block scalar. + // . Eats leading *indentation* zeros (i.e., those that come before 'indent'), + // and updates 'indent' (if it hasn't been set yet). + void Scanner::GetBlockIndentation(int& indent, std::string& breaks) + { + int maxIndent = 0; + + while(1) { + // eat as many indentation spaces as we can + while((indent == 0 || m_column < indent) && INPUT.peek() == ' ') + Eat(1); + + if(m_column > maxIndent) + maxIndent = m_column; + + // do we need more indentation, but we've got a tab? + if((indent == 0 || m_column < indent) && INPUT.peek() == '\t') + throw IllegalTabInScalar(); // TODO: are literal scalar lines allowed to have tabs here? + + // is this a non-empty line? + if(!Exp::Break.Matches(INPUT)) + break; + + // otherwise, eat the line break and move on + int n = Exp::Break.Match(INPUT); + breaks += GetChar(n); + } + + // finally, set the indentation + if(indent == 0) { + indent = maxIndent; + if(indent < m_indents.top() + 1) + indent = m_indents.top() + 1; + if(indent < 1) + indent = 1; + } + } +} diff --git a/scanscalar.h b/scanscalar.h new file mode 100644 index 0000000000..d63831b231 --- /dev/null +++ b/scanscalar.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace YAML +{ + struct WhitespaceInfo { + WhitespaceInfo(); + + void SetChompers(char ch); + void AddBlank(char ch); + void AddBreak(const std::string& line); + std::string Join(bool lastline = false); + + bool leadingBlanks; + bool fold; + std::string whitespace, leadingBreaks, trailingBreaks; + int chomp, increment; + }; +} diff --git a/scantoken.cpp b/scantoken.cpp index c81feb8de2..5f9bcc074d 100644 --- a/scantoken.cpp +++ b/scantoken.cpp @@ -220,339 +220,4 @@ namespace YAML pToken->value = tag; return pToken; } - - // PlainScalarToken - // . We scan these in passes of two steps each: First, grab all non-whitespace - // characters we can, and then grab all whitespace characters we can. - // . This has the benefit of letting us handle leading whitespace (which is chomped) - // and in-line whitespace (which is kept) separately. - template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) - { - // insert a potential simple key - if(m_simpleKeyAllowed) - InsertSimpleKey(); - m_simpleKeyAllowed = false; - - // now eat and store the scalar - std::string scalar; - WhitespaceInfo info; - - while(INPUT) { - // doc start/end tokens - if(IsDocumentStart() || IsDocumentEnd()) - break; - - // comment - if(Exp::Comment.Matches(INPUT)) - break; - - // first eat non-blanks - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { - // illegal colon in flow context - if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) - throw IllegalScalar(); - - // characters that might end the scalar - if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) - break; - if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) - break; - - // finally, read the character! - scalar += GetChar(); - } - - // did we hit a non-blank character that ended us? - if(!Exp::BlankOrBreak.Matches(INPUT)) - break; - - // now eat blanks - while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { - if(Exp::Blank.Matches(INPUT)) { - // can't use tabs as indentation! only spaces! - if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top()) - throw IllegalTabInScalar(); - - info.AddBlank(GetChar()); - } else { - // we know it's a line break; see how many characters to read - int n = Exp::Break.Match(INPUT); - std::string line = GetChar(n); - info.AddBreak(line); - - // and we can't continue a simple key to the next line - ValidateSimpleKey(); - } - } - - // break if we're below the indentation level - if(m_flowLevel == 0 && m_column <= m_indents.top()) - break; - - // finally join whitespace - scalar += info.Join(); - } - - // now modify our token - pToken->value = scalar; - if(info.leadingBlanks) - m_simpleKeyAllowed = true; - - return pToken; - } - - // QuotedScalarToken - template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken) - { - // insert a potential simple key - if(m_simpleKeyAllowed) - InsertSimpleKey(); - m_simpleKeyAllowed = false; - - // eat single or double quote - char quote = GetChar(); - bool single = (quote == '\''); - - // now eat and store the scalar - std::string scalar; - WhitespaceInfo info; - - while(INPUT) { - if(IsDocumentStart() || IsDocumentEnd()) - throw DocIndicatorInQuote(); - - if(INPUT.peek() == EOF) - throw EOFInQuote(); - - // first eat non-blanks - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { - // escaped single quote? - if(single && Exp::EscSingleQuote.Matches(INPUT)) { - int n = Exp::EscSingleQuote.Match(INPUT); - scalar += GetChar(n); - continue; - } - - // is the quote ending? - if(INPUT.peek() == (single ? '\'' : '\"')) - break; - - // escaped newline? - if(Exp::EscBreak.Matches(INPUT)) - break; - - // other escape sequence - if(INPUT.peek() == '\\') { - int length = 0; - scalar += Exp::Escape(INPUT, length); - m_column += length; - continue; - } - - // and finally, just add the damn character - scalar += GetChar(); - } - - // is the quote ending? - if(INPUT.peek() == (single ? '\'' : '\"')) { - // eat and go - GetChar(); - break; - } - - // now we eat blanks - while(Exp::BlankOrBreak.Matches(INPUT)) { - if(Exp::Blank.Matches(INPUT)) { - info.AddBlank(GetChar()); - } else { - // we know it's a line break; see how many characters to read - int n = Exp::Break.Match(INPUT); - std::string line = GetChar(n); - info.AddBreak(line); - - // and we can't continue a simple key to the next line - ValidateSimpleKey(); - } - } - - // and finally join the whitespace - scalar += info.Join(); - } - - pToken->value = scalar; - return pToken; - } - - // BlockScalarToken - template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken) - { - // simple keys always ok after block scalars (since we're gonna start a new line anyways) - m_simpleKeyAllowed = true; - - WhitespaceInfo info; - - // eat block indicator ('|' or '>') - char indicator = GetChar(); - info.fold = (indicator == Keys::FoldedScalar); - - // eat chomping/indentation indicators - int n = Exp::Chomp.Match(INPUT); - for(int i=0;i= 0) - indent += m_indents.top(); - - // finally, grab that scalar - std::string scalar; - while(INPUT) { - // initialize indentation - GetBlockIndentation(indent, info.trailingBreaks); - - // are we done with this guy (i.e. at a lower indentation?) - if(m_column != indent) - break; - - bool trailingBlank = Exp::Blank.Matches(INPUT); - scalar += info.Join(); - - bool leadingBlank = Exp::Blank.Matches(INPUT); - - // now eat and save the line - while(INPUT.peek() != EOF && !Exp::Break.Matches(INPUT)) - scalar += GetChar(); - - // we know it's a line break; see how many characters to read - int n = Exp::Break.Match(INPUT); - std::string line = GetChar(n); - info.AddBreak(line); - } - - // one last whitespace join (with chompers this time) - scalar += info.Join(true); - - // finally set the scalar - pToken->value = scalar; - - return pToken; - } - - // GetBlockIndentation - // . Helper to scanning a block scalar. - // . Eats leading *indentation* zeros (i.e., those that come before 'indent'), - // and updates 'indent' (if it hasn't been set yet). - void Scanner::GetBlockIndentation(int& indent, std::string& breaks) - { - int maxIndent = 0; - - while(1) { - // eat as many indentation spaces as we can - while((indent == 0 || m_column < indent) && INPUT.peek() == ' ') - Eat(1); - - if(m_column > maxIndent) - maxIndent = m_column; - - // do we need more indentation, but we've got a tab? - if((indent == 0 || m_column < indent) && INPUT.peek() == '\t') - throw IllegalTabInScalar(); // TODO: are literal scalar lines allowed to have tabs here? - - // is this a non-empty line? - if(!Exp::Break.Matches(INPUT)) - break; - - // otherwise, eat the line break and move on - int n = Exp::Break.Match(INPUT); - breaks += GetChar(n); - } - - // finally, set the indentation - if(indent == 0) { - indent = maxIndent; - if(indent < m_indents.top() + 1) - indent = m_indents.top() + 1; - if(indent < 1) - indent = 1; - } - } - - ////////////////////////////////////////////////////////// - // WhitespaceInfo stuff - - Scanner::WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false), fold(true), chomp(0), increment(0) - { - } - - void Scanner::WhitespaceInfo::SetChompers(char ch) - { - if(ch == '+') - chomp = 1; - else if(ch == '-') - chomp = -1; - else if(Exp::Digit.Matches(ch)) { - increment = ch - '0'; - if(increment == 0) - throw ZeroIndentationInBlockScalar(); - } - } - - void Scanner::WhitespaceInfo::AddBlank(char ch) - { - if(!leadingBlanks) - whitespace += ch; - } - - void Scanner::WhitespaceInfo::AddBreak(const std::string& line) - { - // where to store this character? - if(!leadingBlanks) { - leadingBlanks = true; - whitespace = ""; - leadingBreaks += line; - } else - trailingBreaks += line; - } - - std::string Scanner::WhitespaceInfo::Join(bool lastLine) - { - std::string ret; - - if(leadingBlanks) { - // fold line break? - if(fold && Exp::Break.Matches(leadingBreaks) && trailingBreaks.empty() && !lastLine) - ret = " "; - else if(!lastLine || chomp != -1) - ret = leadingBreaks; - - if(!lastLine || chomp == 1) - ret += trailingBreaks; - - leadingBlanks = false; - leadingBreaks = ""; - trailingBreaks = ""; - } else if(!whitespace.empty()) { - ret = whitespace; - whitespace = ""; - } - - return ret; - } } diff --git a/test.yaml b/test.yaml index 9dfb8de769..98f6da43e6 100644 --- a/test.yaml +++ b/test.yaml @@ -3,10 +3,10 @@ people: name: Jesse age: 23 - &dab - name: Daniel + name: 'Daniel' age: 25 - &ncb - name: Naftali + name: "Naftali" age: 21 students: - *jsb diff --git a/token.h b/token.h index 93e83a6e07..81218170c7 100644 --- a/token.h +++ b/token.h @@ -44,6 +44,10 @@ namespace YAML }; struct PlainScalarToken: public ScalarToken {}; - struct QuotedScalarToken: public ScalarToken {}; + struct QuotedScalarToken: public ScalarToken { + bool single; + virtual void Write(std::ostream& out) const { out << (single ? '\'' : '\"') << value << (single ? '\'' : '\"'); } + }; + struct BlockScalarToken: public ScalarToken {}; } diff --git a/yaml-reader.vcproj b/yaml-reader.vcproj index b0c6da3b82..d68609b35a 100644 --- a/yaml-reader.vcproj +++ b/yaml-reader.vcproj @@ -201,6 +201,10 @@ RelativePath=".\scanner.cpp" > + + @@ -259,6 +263,10 @@ RelativePath=".\scanner.h" > + +