From 852e5b63e583789d8f4f320b27dc6c834636dbaa Mon Sep 17 00:00:00 2001 From: Jesse Beder Date: Mon, 30 Jun 2008 21:47:21 +0000 Subject: [PATCH] Instead of deriving different tokens from a base Token class, we now use an enumerated TOKEN_TYPE to distinguish types. This is so we don't have to cast all the time when parsing the resulting token stream. Also, removed start/end stream tokens. --- parser.cpp | 5 +- scanner.cpp | 86 +++++++++++----------- scanner.h | 23 +++++- scantoken.cpp | 194 ++++++++++++++++++++++---------------------------- simplekey.cpp | 2 +- token.h | 105 +++++++++++++-------------- 6 files changed, 201 insertions(+), 214 deletions(-) diff --git a/parser.cpp b/parser.cpp index 3ec881cedf..75c014c739 100644 --- a/parser.cpp +++ b/parser.cpp @@ -8,9 +8,6 @@ namespace YAML { Parser::Parser(std::istream& in): m_scanner(in) { - // eat the stream start token - // TODO: check? - Token *pToken = m_scanner.GetNextToken(); } Parser::~Parser() @@ -25,7 +22,7 @@ namespace YAML if(!pToken) break; - std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl; + std::cout << *pToken << std::endl; delete pToken; } getchar(); diff --git a/scanner.cpp b/scanner.cpp index c3e82ec1e4..448a8da435 100644 --- a/scanner.cpp +++ b/scanner.cpp @@ -16,10 +16,6 @@ namespace YAML delete m_tokens.front(); m_tokens.pop(); } - - // delete limbo tokens (they're here for RAII) - for(std::set ::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it) - delete *it; } // GetNextToken @@ -78,7 +74,7 @@ namespace YAML return; if(!m_startedStream) - return ScanAndEnqueue(new StreamStartToken); + return StartStream(); // get rid of whitespace, etc. (in between tokens it should be irrelevent) ScanToNextToken(); @@ -95,62 +91,56 @@ namespace YAML // end of stream if(INPUT.peek() == EOF) - return ScanAndEnqueue(new StreamEndToken); + return EndStream(); if(INPUT.column == 0 && INPUT.peek() == Keys::Directive) - return ScanAndEnqueue(new DirectiveToken); + return ScanDirective(); // document token if(INPUT.column == 0 && Exp::DocStart.Matches(INPUT)) - return ScanAndEnqueue(new DocumentStartToken); + return ScanDocStart(); if(INPUT.column == 0 && Exp::DocEnd.Matches(INPUT)) - return ScanAndEnqueue(new DocumentEndToken); + return ScanDocEnd(); // flow start/end/entry - if(INPUT.peek() == Keys::FlowSeqStart) - return ScanAndEnqueue(new FlowSeqStartToken); - - if(INPUT.peek() == Keys::FlowSeqEnd) - return ScanAndEnqueue(new FlowSeqEndToken); - - if(INPUT.peek() == Keys::FlowMapStart) - return ScanAndEnqueue(new FlowMapStartToken); - - if(INPUT.peek() == Keys::FlowMapEnd) - return ScanAndEnqueue(new FlowMapEndToken); + if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart) + return ScanFlowStart(); + if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) + return ScanFlowEnd(); + if(INPUT.peek() == Keys::FlowEntry) - return ScanAndEnqueue(new FlowEntryToken); + return ScanFlowEntry(); // block/map stuff if(Exp::BlockEntry.Matches(INPUT)) - return ScanAndEnqueue(new BlockEntryToken); + return ScanBlockEntry(); if((m_flowLevel == 0 ? Exp::Key : Exp::KeyInFlow).Matches(INPUT)) - return ScanAndEnqueue(new KeyToken); + return ScanKey(); if((m_flowLevel == 0 ? Exp::Value : Exp::ValueInFlow).Matches(INPUT)) - return ScanAndEnqueue(new ValueToken); + return ScanValue(); // alias/anchor if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) - return ScanAndEnqueue(new AnchorToken); + return ScanAnchorOrAlias(); // tag if(INPUT.peek() == Keys::Tag) - return ScanAndEnqueue(new TagToken); + return ScanTag(); // special scalars if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar)) - return ScanAndEnqueue(new BlockScalarToken); + return ScanBlockScalar(); if(INPUT.peek() == '\'' || INPUT.peek() == '\"') - return ScanAndEnqueue(new QuotedScalarToken); + return ScanQuotedScalar(); // plain scalars if((m_flowLevel == 0 ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT)) - return ScanAndEnqueue(new PlainScalarToken); + return ScanPlainScalar(); // don't know what it is! throw UnknownToken(); @@ -210,18 +200,28 @@ namespace YAML return false; } - // ScanAndEnqueue - // . Scans the token, then pushes it in the queue. - // . Note: we also use a set of "limbo tokens", i.e., tokens - // that haven't yet been pushed. This way, if ScanToken() - // throws an exception, we'll be keeping track of 'pToken' - // somewhere, and it will be automatically cleaned up when - // the Scanner destructs. - template void Scanner::ScanAndEnqueue(T *pToken) + // StartStream + // . Set the initial conditions for starting a stream. + void Scanner::StartStream() { - m_limboTokens.insert(pToken); - m_tokens.push(ScanToken(pToken)); - m_limboTokens.erase(pToken); + m_startedStream = true; + m_simpleKeyAllowed = true; + m_indents.push(-1); + } + + // EndStream + // . Close out the stream, finish up, etc. + void Scanner::EndStream() + { + // force newline + if(INPUT.column > 0) + INPUT.column = 0; + + PopIndentTo(-1); + VerifyAllSimpleKeys(); + + m_simpleKeyAllowed = false; + m_endedStream = true; } // PushIndentTo @@ -241,9 +241,9 @@ namespace YAML // now push m_indents.push(column); if(sequence) - m_tokens.push(new BlockSeqStartToken); + m_tokens.push(new Token(TT_BLOCK_SEQ_START)); else - m_tokens.push(new BlockMapStartToken); + m_tokens.push(new Token(TT_BLOCK_MAP_START)); return m_tokens.front(); } @@ -260,7 +260,7 @@ namespace YAML // now pop away while(!m_indents.empty() && m_indents.top() > column) { m_indents.pop(); - m_tokens.push(new BlockEndToken); + m_tokens.push(new Token(TT_BLOCK_END)); } } } diff --git a/scanner.h b/scanner.h index 8f711cb16a..f686ba43a7 100644 --- a/scanner.h +++ b/scanner.h @@ -23,6 +23,8 @@ namespace YAML private: // scanning + void StartStream(); + void EndStream(); void ScanNextToken(); void ScanToNextToken(); Token *PushIndentTo(int column, bool sequence); @@ -46,8 +48,24 @@ namespace YAML Token *pMapStart, *pKey; }; - template void ScanAndEnqueue(T *pToken); - template T *ScanToken(T *pToken); + // and the tokens + void ScanDirective(); + void ScanDocStart(); + void ScanDocEnd(); + void ScanBlockSeqStart(); + void ScanBlockMapSTart(); + void ScanBlockEnd(); + void ScanBlockEntry(); + void ScanFlowStart(); + void ScanFlowEnd(); + void ScanFlowEntry(); + void ScanKey(); + void ScanValue(); + void ScanAnchorOrAlias(); + void ScanTag(); + void ScanPlainScalar(); + void ScanQuotedScalar(); + void ScanBlockScalar(); private: // the stream @@ -55,7 +73,6 @@ namespace YAML // the output (tokens) std::queue m_tokens; - std::set m_limboTokens; // state info bool m_startedStream, m_endedStream; diff --git a/scantoken.cpp b/scantoken.cpp index f9ea4bff9b..6e22a8816a 100644 --- a/scantoken.cpp +++ b/scantoken.cpp @@ -9,36 +9,13 @@ namespace YAML /////////////////////////////////////////////////////////////////////// // Specialization for scanning specific tokens - // StreamStartToken - template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken) - { - m_startedStream = true; - m_simpleKeyAllowed = true; - m_indents.push(-1); - - return pToken; - } - - // StreamEndToken - template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken) - { - // force newline - if(INPUT.column > 0) - INPUT.column = 0; - - PopIndentTo(-1); - VerifyAllSimpleKeys(); - - m_simpleKeyAllowed = false; - m_endedStream = true; - - return pToken; - } - - // DirectiveToken + // Directive // . Note: no semantic checking is done here (that's for the parser to do) - template <> DirectiveToken *Scanner::ScanToken(DirectiveToken *pToken) + void Scanner::ScanDirective() { + std::string name; + std::vector params; + // pop indents and simple keys PopIndentTo(-1); VerifyAllSimpleKeys(); @@ -50,7 +27,7 @@ namespace YAML // read name while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT)) - pToken->name += INPUT.GetChar(); + name += INPUT.GetChar(); // read parameters while(1) { @@ -67,14 +44,17 @@ namespace YAML while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT)) param += INPUT.GetChar(); - pToken->params.push_back(param); + params.push_back(param); } - return pToken; + Token *pToken = new Token(TT_DIRECTIVE); + pToken->value = name; + pToken->params = params; + m_tokens.push(pToken); } - // DocumentStartToken - template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken) + // DocStart + void Scanner::ScanDocStart() { PopIndentTo(INPUT.column); VerifyAllSimpleKeys(); @@ -82,11 +62,11 @@ namespace YAML // eat INPUT.Eat(3); - return pToken; + m_tokens.push(new Token(TT_DOC_START)); } - // DocumentEndToken - template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken) + // DocEnd + void Scanner::ScanDocEnd() { PopIndentTo(-1); VerifyAllSimpleKeys(); @@ -94,37 +74,25 @@ namespace YAML // eat INPUT.Eat(3); - return pToken; + m_tokens.push(new Token(TT_DOC_END)); } - // FlowSeqStartToken - template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken) + // FlowStart + void Scanner::ScanFlowStart() { - // flow sequences can be simple keys + // flows can be simple keys InsertSimpleKey(); m_flowLevel++; m_simpleKeyAllowed = true; // eat - INPUT.Eat(1); - return pToken; + char ch = INPUT.GetChar(); + TOKEN_TYPE type = (ch == Keys::FlowSeqStart ? TT_FLOW_SEQ_START : TT_FLOW_MAP_START); + m_tokens.push(new Token(type)); } - // FlowMapStartToken - template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken) - { - // flow maps can be simple keys - InsertSimpleKey(); - m_flowLevel++; - m_simpleKeyAllowed = true; - - // eat - INPUT.Eat(1); - return pToken; - } - - // FlowSeqEndToken - template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken) + // FlowEnd + void Scanner::ScanFlowEnd() { if(m_flowLevel == 0) throw IllegalFlowEnd(); @@ -133,36 +101,23 @@ namespace YAML m_simpleKeyAllowed = false; // eat - INPUT.Eat(1); - return pToken; + char ch = INPUT.GetChar(); + TOKEN_TYPE type = (ch == Keys::FlowSeqEnd ? TT_FLOW_SEQ_END : TT_FLOW_MAP_END); + m_tokens.push(new Token(type)); } - // FlowMapEndToken - template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken) - { - if(m_flowLevel == 0) - throw IllegalFlowEnd(); - - m_flowLevel--; - m_simpleKeyAllowed = false; - - // eat - INPUT.Eat(1); - return pToken; - } - - // FlowEntryToken - template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken) + // FlowEntry + void Scanner::ScanFlowEntry() { m_simpleKeyAllowed = true; // eat INPUT.Eat(1); - return pToken; + m_tokens.push(new Token(TT_FLOW_ENTRY)); } - // BlockEntryToken - template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken) + // BlockEntry + void Scanner::ScanBlockEntry() { // we better be in the block context! if(m_flowLevel > 0) @@ -177,11 +132,11 @@ namespace YAML // eat INPUT.Eat(1); - return pToken; + m_tokens.push(new Token(TT_BLOCK_ENTRY)); } - // KeyToken - template <> KeyToken *Scanner::ScanToken(KeyToken *pToken) + // Key + void Scanner::ScanKey() { // handle keys diffently in the block context (and manage indents) if(m_flowLevel == 0) { @@ -199,11 +154,11 @@ namespace YAML // eat INPUT.Eat(1); - return pToken; + m_tokens.push(new Token(TT_KEY)); } - // ValueToken - template <> ValueToken *Scanner::ScanToken(ValueToken *pToken) + // Value + void Scanner::ScanValue() { // does this follow a simple key? if(m_isLastKeyValid) { @@ -227,12 +182,15 @@ namespace YAML // eat INPUT.Eat(1); - return pToken; + m_tokens.push(new Token(TT_VALUE)); } - // AnchorToken - template <> AnchorToken *Scanner::ScanToken(AnchorToken *pToken) + // AnchorOrAlias + void Scanner::ScanAnchorOrAlias() { + bool alias; + std::string tag; + // insert a potential simple key if(m_simpleKeyAllowed) InsertSimpleKey(); @@ -240,10 +198,9 @@ namespace YAML // eat the indicator char indicator = INPUT.GetChar(); - pToken->alias = (indicator == Keys::Alias); + alias = (indicator == Keys::Alias); // now eat the content - std::string tag; while(Exp::AlphaNumeric.Matches(INPUT)) tag += INPUT.GetChar(); @@ -256,13 +213,16 @@ namespace YAML throw IllegalCharacterInAnchor(); // and we're done + Token *pToken = new Token(alias ? TT_ALIAS : TT_ANCHOR); pToken->value = tag; - return pToken; + m_tokens.push(pToken); } - // TagToken - template <> TagToken *Scanner::ScanToken(TagToken *pToken) + // Tag + void Scanner::ScanTag() { + std::string handle, suffix; + // insert a potential simple key if(m_simpleKeyAllowed) InsertSimpleKey(); @@ -273,7 +233,7 @@ namespace YAML // read the handle while(INPUT.peek() != EOF && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT)) - pToken->handle += INPUT.GetChar(); + handle += INPUT.GetChar(); // is there a suffix? if(INPUT.peek() == Keys::Tag) { @@ -282,15 +242,20 @@ namespace YAML // then read it while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT)) - pToken->suffix += INPUT.GetChar(); + suffix += INPUT.GetChar(); } - return pToken; + Token *pToken = new Token(TT_TAG); + pToken->value = handle; + pToken->params.push_back(suffix); + m_tokens.push(pToken); } - // PlainScalarToken - template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) + // PlainScalar + void Scanner::ScanPlainScalar() { + std::string scalar; + // set up the scanning parameters ScanScalarParams params; params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment); @@ -307,7 +272,7 @@ namespace YAML if(m_simpleKeyAllowed) InsertSimpleKey(); - pToken->value = ScanScalar(INPUT, params); + scalar = ScanScalar(INPUT, params); // can have a simple key only if we ended the scalar by starting a new line m_simpleKeyAllowed = params.leadingSpaces; @@ -317,21 +282,25 @@ namespace YAML if(Exp::IllegalColonInScalar.Matches(INPUT)) throw IllegalScalar(); - return pToken; + Token *pToken = new Token(TT_SCALAR); + pToken->value = scalar; + m_tokens.push(pToken); } - // QuotedScalarToken - template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken) + // QuotedScalar + void Scanner::ScanQuotedScalar() { + std::string scalar; + // eat single or double quote char quote = INPUT.GetChar(); - pToken->single = (quote == '\''); + bool single = (quote == '\''); // setup the scanning parameters ScanScalarParams params; - params.end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote)); + params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote)); params.eatEnd = true; - params.escape = (pToken->single ? '\'' : '\\'); + params.escape = (single ? '\'' : '\\'); params.indent = 0; params.fold = true; params.eatLeadingWhitespace = true; @@ -343,18 +312,22 @@ namespace YAML if(m_simpleKeyAllowed) InsertSimpleKey(); - pToken->value = ScanScalar(INPUT, params); + scalar = ScanScalar(INPUT, params); m_simpleKeyAllowed = false; - return pToken; + Token *pToken = new Token(TT_SCALAR); + pToken->value = scalar; + m_tokens.push(pToken); } // BlockScalarToken // . These need a little extra processing beforehand. // . We need to scan the line where the indicator is (this doesn't count as part of the scalar), // and then we need to figure out what level of indentation we'll be using. - template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken) + void Scanner::ScanBlockScalar() { + std::string scalar; + ScanScalarParams params; params.indent = 1; params.detectIndent = true; @@ -401,10 +374,13 @@ namespace YAML params.trimTrailingSpaces = false; params.onTabInIndentation = THROW; - pToken->value = ScanScalar(INPUT, params); + scalar = ScanScalar(INPUT, params); // simple keys always ok after block scalars (since we're gonna start a new line anyways) m_simpleKeyAllowed = true; - return pToken; + + Token *pToken = new Token(TT_SCALAR); + pToken->value = scalar; + m_tokens.push(pToken); } } diff --git a/simplekey.cpp b/simplekey.cpp index aa5d8b35ac..3f17265b53 100644 --- a/simplekey.cpp +++ b/simplekey.cpp @@ -44,7 +44,7 @@ namespace YAML // key.required = true; // TODO: is this correct? // then add the (now unverified) key - key.pKey = new KeyToken; + key.pKey = new Token(TT_KEY); key.pKey->status = TS_UNVERIFIED; m_tokens.push(key.pKey); diff --git a/token.h b/token.h index 3a3bff51de..17bd20d5ca 100644 --- a/token.h +++ b/token.h @@ -7,64 +7,61 @@ namespace YAML { enum TOKEN_STATUS { TS_VALID, TS_INVALID, TS_UNVERIFIED }; + enum TOKEN_TYPE { + TT_DIRECTIVE, + TT_DOC_START, + TT_DOC_END, + TT_BLOCK_SEQ_START, + TT_BLOCK_MAP_START, + TT_BLOCK_END, + TT_BLOCK_ENTRY, + TT_FLOW_SEQ_START, + TT_FLOW_MAP_START, + TT_FLOW_SEQ_END, + TT_FLOW_MAP_END, + TT_FLOW_ENTRY, + TT_KEY, + TT_VALUE, + TT_ANCHOR, + TT_ALIAS, + TT_TAG, + TT_SCALAR, + }; + + const std::string TokenNames[] = { + "DIRECTIVE", + "DOC_START", + "DOC_END", + "BLOCK_SEQ_START", + "BLOCK_MAP_START", + "BLOCK_END", + "BLOCK_ENTRY", + "FLOW_SEQ_START", + "FLOW_MAP_START", + "FLOW_SEQ_END", + "FLOW_MAP_END", + "FLOW_ENTRY", + "KEY", + "VALUE", + "ANCHOR", + "ALIAS", + "TAG", + "SCALAR", + }; struct Token { - Token(): status(TS_VALID) {} - virtual ~Token() {} - virtual void Write(std::ostream& out) const {} + Token(TOKEN_TYPE type_): status(TS_VALID), type(type_) {} + + friend std::ostream& operator << (std::ostream& out, const Token& token) { + out << TokenNames[token.type] << ": " << token.value; + for(unsigned i=0;i params; - - virtual void Write(std::ostream& out) const { out << name; for(unsigned i=0;i