From 43ea59a4ed70bfe4f31f3fc2443e96bcb55b0203 Mon Sep 17 00:00:00 2001 From: Jesse Beder Date: Sat, 28 Jun 2008 20:08:21 +0000 Subject: [PATCH] Added folded and literal scalars. --- exceptions.h | 2 + exp.h | 3 ++ scanner.cpp | 11 ++-- scanner.h | 7 ++- scantoken.cpp | 145 ++++++++++++++++++++++++++++++++++++++++++++++---- test.yaml | 7 ++- token.h | 1 + 7 files changed, 153 insertions(+), 23 deletions(-) diff --git a/exceptions.h b/exceptions.h index cd39abcac7..1e0867b8b1 100644 --- a/exceptions.h +++ b/exceptions.h @@ -15,6 +15,8 @@ namespace YAML class DocIndicatorInQuote: public Exception {}; class EOFInQuote: public Exception {}; class RequiredSimpleKeyNotFound: public Exception {}; + class ZeroIndentationInBlockScalar: public Exception {}; + class UnexpectedCharacterInBlockScalar: public Exception {}; class UnknownEscapeSequence: public Exception { public: diff --git a/exp.h b/exp.h index 3ad948fe16..3dcbdb2fba 100644 --- a/exp.h +++ b/exp.h @@ -43,6 +43,9 @@ namespace YAML const RegEx EscSingleQuote = RegEx("\'\'"); const RegEx EscBreak = RegEx('\\') + Break; + const RegEx ChompIndicator = RegEx("+-", REGEX_OR); + const RegEx Chomp = (ChompIndicator + Digit) || (Digit + ChompIndicator) || ChompIndicator || Digit; + // and some functions std::string Escape(std::istream& in, int& length); } diff --git a/scanner.cpp b/scanner.cpp index d785706573..1237a60133 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -31,8 +31,8 @@ namespace YAML // . Extracts a character from the stream and updates our position char Scanner::GetChar() { - m_column++; char ch = INPUT.get(); + m_column++; if(ch == '\n') { m_column = 0; m_line++; @@ -201,12 +201,9 @@ namespace YAML // TODO: alias/anchor/tag - // TODO: special scalars - if(INPUT.peek() == Keys::LiteralScalar && m_flowLevel == 0) - return; - - if(INPUT.peek() == Keys::FoldedScalar && m_flowLevel == 0) - return; + // special scalars + if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar)) + return ScanAndEnqueue(new BlockScalarToken); if(INPUT.peek() == '\'' || INPUT.peek() == '\"') return ScanAndEnqueue(new QuotedScalarToken); diff --git a/scanner.h b/scanner.h index 8d0033baad..b83b8a2bc5 100644 --- a/scanner.h +++ b/scanner.h @@ -45,15 +45,20 @@ namespace YAML bool IsValue(); bool IsPlainScalar(); + void GetBlockIndentation(int& indent, std::string& breaks); + struct WhitespaceInfo { WhitespaceInfo(); + void SetChompers(char ch); void AddBlank(char ch); void AddBreak(const std::string& line); - std::string Join(); + std::string Join(bool lastline = false); bool leadingBlanks; + bool fold; std::string whitespace, leadingBreaks, trailingBreaks; + int chomp, increment; }; struct SimpleKey { diff --git a/scantoken.cpp b/scantoken.cpp index 0cc741ee97..6cd08d2789 100644 --- a/scantoken.cpp +++ b/scantoken.cpp @@ -358,13 +358,137 @@ namespace YAML return pToken; } + // BlockScalarToken + template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken) + { + // simple keys always ok after block scalars (since we're gonna start a new line anyways) + m_simpleKeyAllowed = true; + + WhitespaceInfo info; + + // eat block indicator ('|' or '>') + char indicator = GetChar(); + info.fold = (indicator == Keys::FoldedScalar); + + // eat chomping/indentation indicators + int n = Exp::Chomp.Match(INPUT); + for(int i=0;i= 0) + indent += m_indents.top(); + + // finally, grab that scalar + std::string scalar; + while(INPUT) { + // initialize indentation + GetBlockIndentation(indent, info.trailingBreaks); + + // are we done with this guy (i.e. at a lower indentation?) + if(m_column != indent) + break; + + bool trailingBlank = Exp::Blank.Matches(INPUT); + scalar += info.Join(); + + bool leadingBlank = Exp::Blank.Matches(INPUT); + + // now eat and save the line + while(INPUT.peek() != EOF && !Exp::Break.Matches(INPUT)) + scalar += GetChar(); + + // we know it's a line break; see how many characters to read + int n = Exp::Break.Match(INPUT); + std::string line = GetChar(n); + info.AddBreak(line); + } + + // one last whitespace join (with chompers this time) + scalar += info.Join(true); + + // finally set the scalar + pToken->value = scalar; + + return pToken; + } + + // GetBlockIndentation + // . Helper to scanning a block scalar. + // . Eats leading *indentation* zeros (i.e., those that come before 'indent'), + // and updates 'indent' (if it hasn't been set yet). + void Scanner::GetBlockIndentation(int& indent, std::string& breaks) + { + int maxIndent = 0; + + while(1) { + // eat as many indentation spaces as we can + while((indent == 0 || m_column < indent) && INPUT.peek() == ' ') + Eat(1); + + if(m_column > maxIndent) + maxIndent = m_column; + + // do we need more indentation, but we've got a tab? + if((indent == 0 || m_column < indent) && INPUT.peek() == '\t') + throw IllegalTabInScalar(); // TODO: are literal scalar lines allowed to have tabs here? + + // is this a non-empty line? + if(!Exp::Break.Matches(INPUT)) + break; + + // otherwise, eat the line break and move on + int n = Exp::Break.Match(INPUT); + breaks += GetChar(n); + } + + // finally, set the indentation + if(indent == 0) { + indent = maxIndent; + if(indent < m_indents.top() + 1) + indent = m_indents.top() + 1; + if(indent < 1) + indent = 1; + } + } + ////////////////////////////////////////////////////////// // WhitespaceInfo stuff - Scanner::WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false) + Scanner::WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false), fold(true), chomp(0), increment(0) { } + void Scanner::WhitespaceInfo::SetChompers(char ch) + { + if(ch == '+') + chomp = 1; + else if(ch == '-') + chomp = -1; + else if(Exp::Digit.Matches(ch)) { + increment = ch - '0'; + if(increment == 0) + throw ZeroIndentationInBlockScalar(); + } + } + void Scanner::WhitespaceInfo::AddBlank(char ch) { if(!leadingBlanks) @@ -382,20 +506,19 @@ namespace YAML trailingBreaks += line; } - std::string Scanner::WhitespaceInfo::Join() + std::string Scanner::WhitespaceInfo::Join(bool lastLine) { std::string ret; if(leadingBlanks) { - if(Exp::Break.Matches(leadingBreaks)) { - // fold line break? - if(trailingBreaks.empty()) - ret = " "; - else - ret = trailingBreaks; - } else { - ret = leadingBreaks + trailingBreaks; - } + // fold line break? + if(fold && Exp::Break.Matches(leadingBreaks) && trailingBreaks.empty() && !lastLine) + ret = " "; + else if(!lastLine || chomp != -1) + ret = leadingBreaks; + + if(!lastLine || chomp == 1) + ret += trailingBreaks; leadingBlanks = false; leadingBreaks = ""; diff --git a/test.yaml b/test.yaml index 3cb65c2a51..82533b51b8 100644 --- a/test.yaml +++ b/test.yaml @@ -1,4 +1,3 @@ -{ - a simple key: a value, - ? a complex key: another value, -} \ No newline at end of file +- sun: yellow +- ? earth: blue + : moon: white \ No newline at end of file diff --git a/token.h b/token.h index fec77d2203..1ab6571cb9 100644 --- a/token.h +++ b/token.h @@ -39,4 +39,5 @@ namespace YAML struct PlainScalarToken: public ScalarToken {}; struct QuotedScalarToken: public ScalarToken {}; + struct BlockScalarToken: public ScalarToken {}; }