diff --git a/exp.h b/exp.h new file mode 100644 index 0000000000..bc2c59b91b --- /dev/null +++ b/exp.h @@ -0,0 +1,53 @@ +#pragma once + +#include "regex.h" + +namespace YAML +{ + //////////////////////////////////////////////////////////////////////////////// + // Here we store a bunch of expressions for matching different parts of the file. + + namespace Exp + { + // misc + const RegEx Blank = RegEx(' ') || RegEx('\t'); + const RegEx Break = RegEx('\n'); + const RegEx BlankOrBreak = Blank || Break; + + // actual tags + + const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx()); + const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx()); + const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF)); + const RegEx Key = RegEx('?'), + KeyInFlow = RegEx('?') + BlankOrBreak; + const RegEx Value = RegEx(':'), + ValueInFlow = RegEx(':') + BlankOrBreak; + const RegEx Comment = RegEx('#'); + + // Plain scalar rules: + // . Cannot start with a blank. + // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ ` + // . In the block context - ? : must be not be followed with a space. + // . In the flow context ? : are illegal and - must not be followed with a space. + const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)), + PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank)); + const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak; + const RegEx EndScalar = RegEx(':') + BlankOrBreak, + EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}"); + } + + namespace Keys + { + const char FlowSeqStart = '['; + const char FlowSeqEnd = ']'; + const char FlowMapStart = '{'; + const char FlowMapEnd = '}'; + const char FlowEntry = ','; + const char Alias = '*'; + const char Anchor = '&'; + const char Tag = '!'; + const char LiteralScalar = '|'; + const char FoldedScalar = '>'; + } +} diff --git a/scanner.cpp b/scanner.cpp index f656d32878..7f49977f95 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -1,6 +1,7 @@ #include "scanner.h" #include "token.h" #include "exceptions.h" +#include "exp.h" #include namespace YAML @@ -124,9 +125,6 @@ namespace YAML return Exp::PlainScalar.Matches(INPUT); } - /////////////////////////////////////////////////////////////////////// - // Specialization for scanning specific tokens - // ScanAndEnqueue // . Scans the token, then pushes it in the queue. // . Note: we also use a set of "limbo tokens", i.e., tokens @@ -141,290 +139,6 @@ namespace YAML m_limboTokens.erase(pToken); } - // StreamStartToken - template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken) - { - m_startedStream = true; - m_simpleKeyAllowed = true; - m_indents.push(-1); - - return pToken; - } - - // StreamEndToken - template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken) - { - // force newline - if(m_column > 0) - m_column = 0; - - PopIndentTo(-1); - // TODO: "reset simple keys" - - m_simpleKeyAllowed = false; - m_endedStream = true; - - return pToken; - } - - // DocumentStartToken - template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken) - { - PopIndentTo(m_column); - // TODO: "reset simple keys" - - m_simpleKeyAllowed = false; - - // eat - Eat(3); - return pToken; - } - - // DocumentEndToken - template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken) - { - PopIndentTo(-1); - // TODO: "reset simple keys" - - m_simpleKeyAllowed = false; - - // eat - Eat(3); - return pToken; - } - - // FlowSeqStartToken - template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken) - { - // TODO: "save simple key" - // TODO: increase flow level - - m_simpleKeyAllowed = true; - - // eat - Eat(1); - return pToken; - } - - // FlowMapStartToken - template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken) - { - // TODO: "save simple key" - // TODO: increase flow level - - m_simpleKeyAllowed = true; - - // eat - Eat(1); - return pToken; - } - - // FlowSeqEndToken - template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken) - { - // TODO: "remove simple key" - // TODO: decrease flow level - - m_simpleKeyAllowed = false; - - // eat - Eat(1); - return pToken; - } - - // FlowMapEndToken - template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken) - { - // TODO: "remove simple key" - // TODO: decrease flow level - - m_simpleKeyAllowed = false; - - // eat - Eat(1); - return pToken; - } - - // FlowEntryToken - template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken) - { - // TODO: "remove simple key" - - m_simpleKeyAllowed = true; - - // eat - Eat(1); - return pToken; - } - - // BlockEntryToken - template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken) - { - // we better be in the block context! - if(m_flowLevel == 0) { - // can we put it here? - if(!m_simpleKeyAllowed) - throw IllegalBlockEntry(); - - PushIndentTo(m_column, true); // , -1 - } else { - // TODO: throw? - } - - // TODO: "remove simple key" - - m_simpleKeyAllowed = true; - - // eat - Eat(1); - return pToken; - } - - // KeyToken - template <> KeyToken *Scanner::ScanToken(KeyToken *pToken) - { - // are we in block context? - if(m_flowLevel == 0) { - if(!m_simpleKeyAllowed) - throw IllegalMapKey(); - - PushIndentTo(m_column, false); - } - - // TODO: "remove simple key" - - // can only put a simple key here if we're in block context - if(m_flowLevel == 0) - m_simpleKeyAllowed = true; - else - m_simpleKeyAllowed = false; - - // eat - Eat(1); - return pToken; - } - - // ValueToken - template <> ValueToken *Scanner::ScanToken(ValueToken *pToken) - { - // TODO: Is it a simple key? - if(false) { - } else { - // If not, ... - // are we in block context? - if(m_flowLevel == 0) { - if(!m_simpleKeyAllowed) - throw IllegalMapValue(); - - PushIndentTo(m_column, false); - } - } - - // can only put a simple key here if we're in block context - if(m_flowLevel == 0) - m_simpleKeyAllowed = true; - else - m_simpleKeyAllowed = false; - - // eat - Eat(1); - return pToken; - } - - // PlainScalarToken - template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) - { - // TODO: "save simple key" - - m_simpleKeyAllowed = false; - - // now eat and store the scalar - std::string scalar, whitespace, leadingBreaks, trailingBreaks; - bool leadingBlanks = false; - - while(INPUT) { - // doc start/end tokens - if(IsDocumentStart() || IsDocumentEnd()) - break; - - // comment - if(Exp::Comment.Matches(INPUT)) - break; - - // first eat non-blanks - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { - // illegal colon in flow context - if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) - throw IllegalScalar(); - - // characters that might end the scalar - if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) - break; - if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) - break; - - if(leadingBlanks) { - if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') { - // fold line break? - if(trailingBreaks.empty()) - scalar += ' '; - else { - scalar += trailingBreaks; - trailingBreaks = ""; - } - } else { - scalar += leadingBreaks + trailingBreaks; - leadingBreaks = ""; - trailingBreaks = ""; - } - } else if(!whitespace.empty()) { - scalar += whitespace; - whitespace = ""; - } - - // finally, read the character! - scalar += GetChar(); - } - - // did we hit a non-blank character that ended us? - if(!Exp::BlankOrBreak.Matches(INPUT)) - break; - - // now eat blanks - while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { - if(Exp::Blank.Matches(INPUT)) { - if(leadingBlanks && m_column <= m_indents.top()) - throw IllegalTabInScalar(); - - // maybe store this character - if(!leadingBlanks) - whitespace += GetChar(); - else - Eat(1); - } else { - // where to store this character? - if(!leadingBlanks) { - leadingBlanks = true; - whitespace = ""; - leadingBreaks += GetChar(); - } else - trailingBreaks += GetChar(); - } - } - - // and finally break if we're below the indentation level - if(m_flowLevel == 0 && m_column <= m_indents.top()) - break; - } - - // now modify our token - pToken->SetValue(scalar); - if(leadingBlanks) - m_simpleKeyAllowed = true; - - return pToken; - } - /////////////////////////////////////////////////////////////////////// // The main scanning function diff --git a/scanner.h b/scanner.h index 4a91e508bb..29d6b42840 100644 --- a/scanner.h +++ b/scanner.h @@ -5,56 +5,11 @@ #include #include #include -#include "regex.h" namespace YAML { class Token; - namespace Exp - { - // misc - const RegEx Blank = RegEx(' ') || RegEx('\t'); - const RegEx Break = RegEx('\n'); - const RegEx BlankOrBreak = Blank || Break; - - // actual tags - - const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx()); - const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx()); - const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF)); - const RegEx Key = RegEx('?'), - KeyInFlow = RegEx('?') + BlankOrBreak; - const RegEx Value = RegEx(':'), - ValueInFlow = RegEx(':') + BlankOrBreak; - const RegEx Comment = RegEx('#'); - - // Plain scalar rules: - // . Cannot start with a blank. - // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ ` - // . In the block context - ? : must be not be followed with a space. - // . In the flow context ? : are illegal and - must not be followed with a space. - const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)), - PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank)); - const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak; - const RegEx EndScalar = RegEx(':') + BlankOrBreak, - EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}"); - } - - namespace Keys - { - const char FlowSeqStart = '['; - const char FlowSeqEnd = ']'; - const char FlowMapStart = '{'; - const char FlowMapEnd = '}'; - const char FlowEntry = ','; - const char Alias = '*'; - const char Anchor = '&'; - const char Tag = '!'; - const char LiteralScalar = '|'; - const char FoldedScalar = '>'; - } - class Scanner { public: diff --git a/scantoken.cpp b/scantoken.cpp new file mode 100644 index 0000000000..0fd1179174 --- /dev/null +++ b/scantoken.cpp @@ -0,0 +1,294 @@ +#include "scanner.h" +#include "token.h" +#include "exceptions.h" +#include "exp.h" + +namespace YAML +{ + /////////////////////////////////////////////////////////////////////// + // Specialization for scanning specific tokens + + // StreamStartToken + template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken) + { + m_startedStream = true; + m_simpleKeyAllowed = true; + m_indents.push(-1); + + return pToken; + } + + // StreamEndToken + template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken) + { + // force newline + if(m_column > 0) + m_column = 0; + + PopIndentTo(-1); + // TODO: "reset simple keys" + + m_simpleKeyAllowed = false; + m_endedStream = true; + + return pToken; + } + + // DocumentStartToken + template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken) + { + PopIndentTo(m_column); + // TODO: "reset simple keys" + + m_simpleKeyAllowed = false; + + // eat + Eat(3); + return pToken; + } + + // DocumentEndToken + template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken) + { + PopIndentTo(-1); + // TODO: "reset simple keys" + + m_simpleKeyAllowed = false; + + // eat + Eat(3); + return pToken; + } + + // FlowSeqStartToken + template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken) + { + // TODO: "save simple key" + // TODO: increase flow level + + m_simpleKeyAllowed = true; + + // eat + Eat(1); + return pToken; + } + + // FlowMapStartToken + template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken) + { + // TODO: "save simple key" + // TODO: increase flow level + + m_simpleKeyAllowed = true; + + // eat + Eat(1); + return pToken; + } + + // FlowSeqEndToken + template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken) + { + // TODO: "remove simple key" + // TODO: decrease flow level + + m_simpleKeyAllowed = false; + + // eat + Eat(1); + return pToken; + } + + // FlowMapEndToken + template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken) + { + // TODO: "remove simple key" + // TODO: decrease flow level + + m_simpleKeyAllowed = false; + + // eat + Eat(1); + return pToken; + } + + // FlowEntryToken + template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken) + { + // TODO: "remove simple key" + + m_simpleKeyAllowed = true; + + // eat + Eat(1); + return pToken; + } + + // BlockEntryToken + template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken) + { + // we better be in the block context! + if(m_flowLevel == 0) { + // can we put it here? + if(!m_simpleKeyAllowed) + throw IllegalBlockEntry(); + + PushIndentTo(m_column, true); // , -1 + } else { + // TODO: throw? + } + + // TODO: "remove simple key" + + m_simpleKeyAllowed = true; + + // eat + Eat(1); + return pToken; + } + + // KeyToken + template <> KeyToken *Scanner::ScanToken(KeyToken *pToken) + { + // are we in block context? + if(m_flowLevel == 0) { + if(!m_simpleKeyAllowed) + throw IllegalMapKey(); + + PushIndentTo(m_column, false); + } + + // TODO: "remove simple key" + + // can only put a simple key here if we're in block context + if(m_flowLevel == 0) + m_simpleKeyAllowed = true; + else + m_simpleKeyAllowed = false; + + // eat + Eat(1); + return pToken; + } + + // ValueToken + template <> ValueToken *Scanner::ScanToken(ValueToken *pToken) + { + // TODO: Is it a simple key? + if(false) { + } else { + // If not, ... + // are we in block context? + if(m_flowLevel == 0) { + if(!m_simpleKeyAllowed) + throw IllegalMapValue(); + + PushIndentTo(m_column, false); + } + } + + // can only put a simple key here if we're in block context + if(m_flowLevel == 0) + m_simpleKeyAllowed = true; + else + m_simpleKeyAllowed = false; + + // eat + Eat(1); + return pToken; + } + + // PlainScalarToken + template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) + { + // TODO: "save simple key" + + m_simpleKeyAllowed = false; + + // now eat and store the scalar + std::string scalar, whitespace, leadingBreaks, trailingBreaks; + bool leadingBlanks = false; + + while(INPUT) { + // doc start/end tokens + if(IsDocumentStart() || IsDocumentEnd()) + break; + + // comment + if(Exp::Comment.Matches(INPUT)) + break; + + // first eat non-blanks + while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) { + // illegal colon in flow context + if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT)) + throw IllegalScalar(); + + // characters that might end the scalar + if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT)) + break; + if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) + break; + + if(leadingBlanks) { + if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') { + // fold line break? + if(trailingBreaks.empty()) + scalar += ' '; + else { + scalar += trailingBreaks; + trailingBreaks = ""; + } + } else { + scalar += leadingBreaks + trailingBreaks; + leadingBreaks = ""; + trailingBreaks = ""; + } + } else if(!whitespace.empty()) { + scalar += whitespace; + whitespace = ""; + } + + // finally, read the character! + scalar += GetChar(); + } + + // did we hit a non-blank character that ended us? + if(!Exp::BlankOrBreak.Matches(INPUT)) + break; + + // now eat blanks + while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { + if(Exp::Blank.Matches(INPUT)) { + if(leadingBlanks && m_column <= m_indents.top()) + throw IllegalTabInScalar(); + + // maybe store this character + if(!leadingBlanks) + whitespace += GetChar(); + else + Eat(1); + } else { + // where to store this character? + if(!leadingBlanks) { + leadingBlanks = true; + whitespace = ""; + leadingBreaks += GetChar(); + } else + trailingBreaks += GetChar(); + } + } + + // and finally break if we're below the indentation level + if(m_flowLevel == 0 && m_column <= m_indents.top()) + break; + } + + // now modify our token + pToken->SetValue(scalar); + if(leadingBlanks) + m_simpleKeyAllowed = true; + + return pToken; + } +} diff --git a/yaml-reader.vcproj b/yaml-reader.vcproj index e0c32de3f9..39f42a4bc2 100644 --- a/yaml-reader.vcproj +++ b/yaml-reader.vcproj @@ -197,6 +197,10 @@ RelativePath=".\scanner.cpp" > + + @@ -219,6 +223,10 @@ RelativePath=".\exceptions.h" > + +