diff --git a/exp.cpp b/exp.cpp index 07e4d2b4d7..96ba91c0e3 100644 --- a/exp.cpp +++ b/exp.cpp @@ -62,7 +62,7 @@ namespace YAML } // Escape - // . Escapes the sequence starting 'in' (it must begin with a '\') + // . Escapes the sequence starting 'in' (it must begin with a '\' or single quote) // and returns the result. // . Fills 'length' with how many characters we ate. // . Throws if it's an unknown escape character. @@ -72,10 +72,16 @@ namespace YAML length = 2; // eat slash - in.get(); + char escape = in.get(); // switch on escape character char ch = in.get(); + + // first do single quote, since it's easier + if(escape == '\'' && ch == '\'') + return "\'"; + + // now do the slash (we're not gonna check if it's a slash - you better pass one!) switch(ch) { case '0': return "\0"; case 'a': return "\x07"; diff --git a/regex.cpp b/regex.cpp index 342c0bd90e..040ccb9282 100644 --- a/regex.cpp +++ b/regex.cpp @@ -53,6 +53,7 @@ namespace YAML case REGEX_MATCH: m_pOp = new MatchOperator; break; case REGEX_RANGE: m_pOp = new RangeOperator; break; case REGEX_OR: m_pOp = new OrOperator; break; + case REGEX_AND: m_pOp = new AndOperator; break; case REGEX_NOT: m_pOp = new NotOperator; break; case REGEX_SEQ: m_pOp = new SeqOperator; break; } @@ -80,19 +81,13 @@ namespace YAML // . Returns the number of characters matched. // . Returns -1 if no characters were matched (the reason for // not returning zero is that we may have an empty regex - // which SHOULD be considered successfully matching nothing, - // but that of course matches zero characters). + // which is ALWAYS successful at matching zero characters). int RegEx::Match(const std::string& str) const { if(!m_pOp) - return -1; + return 0; return m_pOp->Match(str, *this); - - //case REGEX_EMPTY: - // if(str.empty()) - // return 0; - // return -1; } // Match @@ -131,6 +126,14 @@ namespace YAML return ret; } + RegEx operator && (const RegEx& ex1, const RegEx& ex2) + { + RegEx ret(REGEX_AND); + ret.m_params.push_back(ex1); + ret.m_params.push_back(ex2); + return ret; + } + RegEx operator + (const RegEx& ex1, const RegEx& ex2) { RegEx ret(REGEX_SEQ); @@ -194,6 +197,36 @@ namespace YAML return -1; } + // AndOperator + // Note: 'AND' is a little funny, since we may be required to match things + // of different lengths. If we find a match, we return the length of + // the FIRST entry on the list. + int RegEx::AndOperator::Match(const std::string& str, const RegEx& regex) const + { + int first = -1; + for(unsigned i=0;i #include #include +#include "regex.h" namespace YAML { @@ -44,6 +45,7 @@ namespace YAML bool IsPlainScalar(); void GetBlockIndentation(int& indent, std::string& breaks); + std::string ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp); struct SimpleKey { SimpleKey(int pos_, int line_, int column_, int flowLevel_); diff --git a/scanscalar.cpp b/scanscalar.cpp index 30744ed56a..1a323ce0f3 100644 --- a/scanscalar.cpp +++ b/scanscalar.cpp @@ -75,74 +75,77 @@ namespace YAML // and in-line whitespace (which is kept) separately. template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) { + //// now eat and store the scalar + //std::string scalar; + //WhitespaceInfo info; + + //while(INPUT) { + // // doc start/end tokens + // if(IsDocumentStart() || IsDocumentEnd()) + // break; + + // // comment + // if(Exp::Comment.Matches(INPUT)) + // break; + + // // first eat non-blanks + // while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { + // // illegal colon in flow context + // if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) + // throw IllegalScalar(); + + // // characters that might end the scalar + // if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) + // break; + // if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) + // break; + + // // finally, read the character! + // scalar += GetChar(); + // } + + // // did we hit a non-blank character that ended us? + // if(!Exp::BlankOrBreak.Matches(INPUT)) + // break; + + // // now eat blanks + // while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { + // if(Exp::Blank.Matches(INPUT)) { + // // can't use tabs as indentation! only spaces! + // if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top()) + // throw IllegalTabInScalar(); + + // info.AddBlank(GetChar()); + // } else { + // // we know it's a line break; see how many characters to read + // int n = Exp::Break.Match(INPUT); + // std::string line = GetChar(n); + // info.AddBreak(line); + + // // and we can't continue a simple key to the next line + // ValidateSimpleKey(); + // } + // } + + // // break if we're below the indentation level + // if(m_flowLevel == 0 && m_column <= m_indents.top()) + // break; + + // // finally join whitespace + // scalar += info.Join(); + //} + + RegEx end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment); + int indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1); + // insert a potential simple key if(m_simpleKeyAllowed) InsertSimpleKey(); + + pToken->value = ScanScalar(end, false, indent, 0, true, true, true, 0); + m_simpleKeyAllowed = false; - - // now eat and store the scalar - std::string scalar; - WhitespaceInfo info; - - while(INPUT) { - // doc start/end tokens - if(IsDocumentStart() || IsDocumentEnd()) - break; - - // comment - if(Exp::Comment.Matches(INPUT)) - break; - - // first eat non-blanks - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { - // illegal colon in flow context - if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) - throw IllegalScalar(); - - // characters that might end the scalar - if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) - break; - if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) - break; - - // finally, read the character! - scalar += GetChar(); - } - - // did we hit a non-blank character that ended us? - if(!Exp::BlankOrBreak.Matches(INPUT)) - break; - - // now eat blanks - while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { - if(Exp::Blank.Matches(INPUT)) { - // can't use tabs as indentation! only spaces! - if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top()) - throw IllegalTabInScalar(); - - info.AddBlank(GetChar()); - } else { - // we know it's a line break; see how many characters to read - int n = Exp::Break.Match(INPUT); - std::string line = GetChar(n); - info.AddBreak(line); - - // and we can't continue a simple key to the next line - ValidateSimpleKey(); - } - } - - // break if we're below the indentation level - if(m_flowLevel == 0 && m_column <= m_indents.top()) - break; - - // finally join whitespace - scalar += info.Join(); - } - - // now modify our token - pToken->value = scalar; - if(info.leadingBlanks) + if(true/*info.leadingBlanks*/) m_simpleKeyAllowed = true; return pToken; @@ -151,91 +154,92 @@ namespace YAML // QuotedScalarToken template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken) { - // insert a potential simple key - if(m_simpleKeyAllowed) - InsertSimpleKey(); - m_simpleKeyAllowed = false; + //// now eat and store the scalar + //std::string scalar; + //WhitespaceInfo info; + + //while(INPUT) { + // if(IsDocumentStart() || IsDocumentEnd()) + // throw DocIndicatorInQuote(); + + // if(INPUT.peek() == EOF) + // throw EOFInQuote(); + + // // first eat non-blanks + // while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { + // // escaped single quote? + // if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) { + // int n = Exp::EscSingleQuote.Match(INPUT); + // scalar += GetChar(n); + // continue; + // } + + // // is the quote ending? + // if(INPUT.peek() == quote) + // break; + + // // escaped newline? + // if(Exp::EscBreak.Matches(INPUT)) + // break; + + // // other escape sequence + // if(INPUT.peek() == '\\') { + // int length = 0; + // scalar += Exp::Escape(INPUT, length); + // m_column += length; + // continue; + // } + + // // and finally, just add the damn character + // scalar += GetChar(); + // } + + // // is the quote ending? + // if(INPUT.peek() == quote) { + // // eat and go + // GetChar(); + // break; + // } + + // // now we eat blanks + // while(Exp::BlankOrBreak.Matches(INPUT)) { + // if(Exp::Blank.Matches(INPUT)) { + // info.AddBlank(GetChar()); + // } else { + // // we know it's a line break; see how many characters to read + // int n = Exp::Break.Match(INPUT); + // std::string line = GetChar(n); + // info.AddBreak(line); + + // // and we can't continue a simple key to the next line + // ValidateSimpleKey(); + // } + // } + + // // and finally join the whitespace + // scalar += info.Join(); + //} // eat single or double quote char quote = GetChar(); pToken->single = (quote == '\''); - // now eat and store the scalar - std::string scalar; - WhitespaceInfo info; + RegEx end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote)); + char escape = (pToken->single ? '\'' : '\\'); - while(INPUT) { - if(IsDocumentStart() || IsDocumentEnd()) - throw DocIndicatorInQuote(); + // insert a potential simple key + if(m_simpleKeyAllowed) + InsertSimpleKey(); - if(INPUT.peek() == EOF) - throw EOFInQuote(); + pToken->value = ScanScalar(end, true, 0, escape, true, true, false, 0); + m_simpleKeyAllowed = false; - // first eat non-blanks - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { - // escaped single quote? - if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) { - int n = Exp::EscSingleQuote.Match(INPUT); - scalar += GetChar(n); - continue; - } - - // is the quote ending? - if(INPUT.peek() == quote) - break; - - // escaped newline? - if(Exp::EscBreak.Matches(INPUT)) - break; - - // other escape sequence - if(INPUT.peek() == '\\') { - int length = 0; - scalar += Exp::Escape(INPUT, length); - m_column += length; - continue; - } - - // and finally, just add the damn character - scalar += GetChar(); - } - - // is the quote ending? - if(INPUT.peek() == quote) { - // eat and go - GetChar(); - break; - } - - // now we eat blanks - while(Exp::BlankOrBreak.Matches(INPUT)) { - if(Exp::Blank.Matches(INPUT)) { - info.AddBlank(GetChar()); - } else { - // we know it's a line break; see how many characters to read - int n = Exp::Break.Match(INPUT); - std::string line = GetChar(n); - info.AddBreak(line); - - // and we can't continue a simple key to the next line - ValidateSimpleKey(); - } - } - - // and finally join the whitespace - scalar += info.Join(); - } - - pToken->value = scalar; return pToken; } // BlockScalarToken template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken) { - // simple keys always ok after block scalars (since we're gonna start a new line anyways) - m_simpleKeyAllowed = true; - WhitespaceInfo info; // eat block indicator ('|' or '>') @@ -268,37 +272,13 @@ namespace YAML if(info.increment && m_indents.top() >= 0) indent += m_indents.top(); - // finally, grab that scalar - std::string scalar; - while(INPUT) { - // initialize indentation - GetBlockIndentation(indent, info.trailingBreaks); + GetBlockIndentation(indent, info.trailingBreaks); - // are we done with this guy (i.e. at a lower indentation?) - if(m_column != indent) - break; - - bool trailingBlank = Exp::Blank.Matches(INPUT); - scalar += info.Join(); - - bool leadingBlank = Exp::Blank.Matches(INPUT); - - // now eat and save the line - while(INPUT.peek() != EOF && !Exp::Break.Matches(INPUT)) - scalar += GetChar(); - - // we know it's a line break; see how many characters to read - int n = Exp::Break.Match(INPUT); - std::string line = GetChar(n); - info.AddBreak(line); - } - - // one last whitespace join (with chompers this time) - scalar += info.Join(true); - - // finally set the scalar - pToken->value = scalar; + bool eatLeadingWhitespace = false; + pToken->value = ScanScalar(RegEx(), false, indent, 0, info.fold, eatLeadingWhitespace, false, info.chomp); + // simple keys always ok after block scalars (since we're gonna start a new line anyways) + m_simpleKeyAllowed = true; return pToken; } @@ -340,4 +320,104 @@ namespace YAML indent = 1; } } + + // ScanScalar + std::string Scanner::ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp) + { + bool emptyLine = false, moreIndented = false; + std::string scalar; + + while(INPUT) { + // ******************************** + // Phase #1: scan until line ending + while(!end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) { + if(INPUT.peek() == EOF) + break; + + // escaped newline? (only if we're escaping on slash) + if(escape == '\\' && Exp::EscBreak.Matches(INPUT)) { + int n = Exp::EscBreak.Match(INPUT); + Eat(n); + continue; + } + + // escape this? + if(INPUT.peek() == escape) { + int length = 0; + scalar += Exp::Escape(INPUT, length); + m_column += length; + continue; + } + + // otherwise, just add the damn character + scalar += GetChar(); + } + + // eof? if we're looking to eat something, then we throw + if(INPUT.peek() == EOF) { + if(eatEnd) + throw EOFInQuote(); + break; + } + + // are we done via character match? + int n = end.Match(INPUT); + if(n >= 0) { + if(eatEnd) + Eat(n); + break; + } + + // ******************************** + // Phase #2: eat line ending + n = Exp::Break.Match(INPUT); + Eat(n); + + // ******************************** + // Phase #3: scan initial spaces + + // first the required indentation + while(INPUT.peek() == ' ' && m_column < indent) + Eat(1); + + // and then the rest of the whitespace + if(eatLeadingWhitespace) { + while(Exp::Blank.Matches(INPUT)) + Eat(1); + } + + // was this an empty line? + bool nextEmptyLine = Exp::Break.Matches(INPUT); + bool nextMoreIndented = (INPUT.peek() == ' '); + + if(fold && !emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented) + scalar += " "; + else + scalar += "\n"; + + emptyLine = nextEmptyLine; + moreIndented = nextMoreIndented; + + // are we done via indentation? + if(!emptyLine && m_column < indent) + break; + } + + // post-processing + if(trimTrailingSpaces) { + unsigned pos = scalar.find_last_not_of(' '); + if(pos < scalar.size()) + scalar.erase(pos + 1); + } + + if(chomp <= 0) { + unsigned pos = scalar.find_last_not_of('\n'); + if(chomp == 0 && pos + 1 < scalar.size()) + scalar.erase(pos + 2); + else if(chomp == -1 && pos < scalar.size()) + scalar.erase(pos + 1); + } + + return scalar; + } } diff --git a/test.yaml b/test.yaml index 98f6da43e6..ee5d76e926 100644 --- a/test.yaml +++ b/test.yaml @@ -1,13 +1,4 @@ -people: - - &jsb - name: Jesse - age: 23 - - &dab - name: 'Daniel' - age: 25 - - &ncb - name: "Naftali" - age: 21 -students: - - *jsb - - *ncb \ No newline at end of file +--- +- "quoted scalar that contains +--- + the document start!" \ No newline at end of file