#include "scanscalar.h" #include "scanner.h" #include "exp.h" #include "exceptions.h" #include "token.h" namespace YAML { ////////////////////////////////////////////////////////// // WhitespaceInfo WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false), fold(true), chomp(0), increment(0) { } void WhitespaceInfo::SetChompers(char ch) { if(ch == '+') chomp = 1; else if(ch == '-') chomp = -1; else if(Exp::Digit.Matches(ch)) { increment = ch - '0'; if(increment == 0) throw ZeroIndentationInBlockScalar(); } } void WhitespaceInfo::AddBlank(char ch) { if(!leadingBlanks) whitespace += ch; } void WhitespaceInfo::AddBreak(const std::string& line) { // where to store this character? if(!leadingBlanks) { leadingBlanks = true; whitespace = ""; leadingBreaks += line; } else trailingBreaks += line; } std::string WhitespaceInfo::Join(bool lastLine) { std::string ret; if(leadingBlanks) { // fold line break? if(fold && Exp::Break.Matches(leadingBreaks) && trailingBreaks.empty() && !lastLine) ret = " "; else if(!lastLine || chomp != -1) ret = leadingBreaks; if(!lastLine || chomp == 1) ret += trailingBreaks; leadingBlanks = false; leadingBreaks = ""; trailingBreaks = ""; } else if(!whitespace.empty()) { ret = whitespace; whitespace = ""; } return ret; } // PlainScalarToken // . We scan these in passes of two steps each: First, grab all non-whitespace // characters we can, and then grab all whitespace characters we can. // . This has the benefit of letting us handle leading whitespace (which is chomped) // and in-line whitespace (which is kept) separately. template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) { //// now eat and store the scalar //std::string scalar; //WhitespaceInfo info; //while(INPUT) { // // doc start/end tokens // if(IsDocumentStart() || IsDocumentEnd()) // break; // // comment // if(Exp::Comment.Matches(INPUT)) // break; // // first eat non-blanks // while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { // // illegal colon in flow context // if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) // throw IllegalScalar(); // // characters that might end the scalar // if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) // break; // if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) // break; // // finally, read the character! // scalar += GetChar(); // } // // did we hit a non-blank character that ended us? // if(!Exp::BlankOrBreak.Matches(INPUT)) // break; // // now eat blanks // while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { // if(Exp::Blank.Matches(INPUT)) { // // can't use tabs as indentation! only spaces! // if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top()) // throw IllegalTabInScalar(); // info.AddBlank(GetChar()); // } else { // // we know it's a line break; see how many characters to read // int n = Exp::Break.Match(INPUT); // std::string line = GetChar(n); // info.AddBreak(line); // // and we can't continue a simple key to the next line // ValidateSimpleKey(); // } // } // // break if we're below the indentation level // if(m_flowLevel == 0 && m_column <= m_indents.top()) // break; // // finally join whitespace // scalar += info.Join(); //} ScanScalarInfo info; info.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment); info.eatEnd = false; info.indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1); info.fold = true; info.eatLeadingWhitespace = true; info.trimTrailingSpaces = true; info.chomp = CLIP; // insert a potential simple key if(m_simpleKeyAllowed) InsertSimpleKey(); pToken->value = ScanScalar(INPUT, info); m_simpleKeyAllowed = false; if(true/*info.leadingBlanks*/) m_simpleKeyAllowed = true; return pToken; } // QuotedScalarToken template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken) { //// now eat and store the scalar //std::string scalar; //WhitespaceInfo info; //while(INPUT) { // if(IsDocumentStart() || IsDocumentEnd()) // throw DocIndicatorInQuote(); // if(INPUT.peek() == EOF) // throw EOFInQuote(); // // first eat non-blanks // while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { // // escaped single quote? // if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) { // int n = Exp::EscSingleQuote.Match(INPUT); // scalar += GetChar(n); // continue; // } // // is the quote ending? // if(INPUT.peek() == quote) // break; // // escaped newline? // if(Exp::EscBreak.Matches(INPUT)) // break; // // other escape sequence // if(INPUT.peek() == '\\') { // int length = 0; // scalar += Exp::Escape(INPUT, length); // m_column += length; // continue; // } // // and finally, just add the damn character // scalar += GetChar(); // } // // is the quote ending? // if(INPUT.peek() == quote) { // // eat and go // GetChar(); // break; // } // // now we eat blanks // while(Exp::BlankOrBreak.Matches(INPUT)) { // if(Exp::Blank.Matches(INPUT)) { // info.AddBlank(GetChar()); // } else { // // we know it's a line break; see how many characters to read // int n = Exp::Break.Match(INPUT); // std::string line = GetChar(n); // info.AddBreak(line); // // and we can't continue a simple key to the next line // ValidateSimpleKey(); // } // } // // and finally join the whitespace // scalar += info.Join(); //} // eat single or double quote char quote = INPUT.GetChar(); pToken->single = (quote == '\''); ScanScalarInfo info; info.end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote)); info.eatEnd = true; info.escape = (pToken->single ? '\'' : '\\'); info.indent = 0; info.fold = true; info.eatLeadingWhitespace = true; info.trimTrailingSpaces = false; info.chomp = CLIP; // insert a potential simple key if(m_simpleKeyAllowed) InsertSimpleKey(); pToken->value = ScanScalar(INPUT, info); m_simpleKeyAllowed = false; return pToken; } // BlockScalarToken template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken) { WhitespaceInfo info; // eat block indicator ('|' or '>') char indicator = INPUT.GetChar(); info.fold = (indicator == Keys::FoldedScalar); // eat chomping/indentation indicators int n = Exp::Chomp.Match(INPUT); for(int i=0;i= 0) indent += m_indents.top(); GetBlockIndentation(INPUT, indent, info.trailingBreaks, m_indents.top()); ScanScalarInfo sinfo; sinfo.indent = indent; sinfo.fold = info.fold; sinfo.eatLeadingWhitespace = false; sinfo.trimTrailingSpaces = false; sinfo.chomp = (CHOMP) info.chomp; pToken->value = ScanScalar(INPUT, sinfo); // simple keys always ok after block scalars (since we're gonna start a new line anyways) m_simpleKeyAllowed = true; return pToken; } // GetBlockIndentation // . Helper to scanning a block scalar. // . Eats leading *indentation* zeros (i.e., those that come before 'indent'), // and updates 'indent' (if it hasn't been set yet). void GetBlockIndentation(Stream& INPUT, int& indent, std::string& breaks, int topIndent) { int maxIndent = 0; while(1) { // eat as many indentation spaces as we can while((indent == 0 || INPUT.column < indent) && INPUT.peek() == ' ') INPUT.Eat(1); if(INPUT.column > maxIndent) maxIndent = INPUT.column; // do we need more indentation, but we've got a tab? if((indent == 0 || INPUT.column < indent) && INPUT.peek() == '\t') throw IllegalTabInScalar(); // TODO: are literal scalar lines allowed to have tabs here? // is this a non-empty line? if(!Exp::Break.Matches(INPUT)) break; // otherwise, eat the line break and move on int n = Exp::Break.Match(INPUT); breaks += INPUT.GetChar(n); } // finally, set the indentation if(indent == 0) { indent = maxIndent; if(indent < topIndent + 1) indent = topIndent + 1; if(indent < 1) indent = 1; } } // ScanScalar std::string ScanScalar(Stream& INPUT, ScanScalarInfo info) { bool emptyLine = false, moreIndented = false; std::string scalar; while(INPUT) { // ******************************** // Phase #1: scan until line ending while(!info.end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) { if(INPUT.peek() == EOF) break; // escaped newline? (only if we're escaping on slash) if(info.escape == '\\' && Exp::EscBreak.Matches(INPUT)) { int n = Exp::EscBreak.Match(INPUT); INPUT.Eat(n); continue; } // escape this? if(INPUT.peek() == info.escape) { scalar += Exp::Escape(INPUT); continue; } // otherwise, just add the damn character scalar += INPUT.GetChar(); } // eof? if we're looking to eat something, then we throw if(INPUT.peek() == EOF) { if(info.eatEnd) throw EOFInQuote(); break; } // are we done via character match? int n = info.end.Match(INPUT); if(n >= 0) { if(info.eatEnd) INPUT.Eat(n); break; } // ******************************** // Phase #2: eat line ending n = Exp::Break.Match(INPUT); INPUT.Eat(n); // ******************************** // Phase #3: scan initial spaces // first the required indentation while(INPUT.peek() == ' ' && INPUT.column < info.indent) INPUT.Eat(1); // and then the rest of the whitespace if(info.eatLeadingWhitespace) { while(Exp::Blank.Matches(INPUT)) INPUT.Eat(1); } // was this an empty line? bool nextEmptyLine = Exp::Break.Matches(INPUT); bool nextMoreIndented = (INPUT.peek() == ' '); if(info.fold && !emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented) scalar += " "; else scalar += "\n"; emptyLine = nextEmptyLine; moreIndented = nextMoreIndented; // are we done via indentation? if(!emptyLine && INPUT.column < info.indent) break; } // post-processing if(info.trimTrailingSpaces) { unsigned pos = scalar.find_last_not_of(' '); if(pos < scalar.size()) scalar.erase(pos + 1); } if(info.chomp <= 0) { unsigned pos = scalar.find_last_not_of('\n'); if(info.chomp == 0 && pos + 1 < scalar.size()) scalar.erase(pos + 2); else if(info.chomp == -1 && pos < scalar.size()) scalar.erase(pos + 1); } return scalar; } }