More simple scalar scanning.

This commit is contained in:
Jesse Beder 2008-06-26 22:00:39 +00:00
parent a3961d047f
commit 31f7db5a0d
5 changed files with 164 additions and 209 deletions

View file

@ -36,8 +36,9 @@ namespace YAML
try { try {
scanner.Scan(); scanner.Scan();
} catch(const UnknownToken& e) { } catch(const Exception& e) {
} }
getchar();
// if(!scanner) // if(!scanner)
// return; // return;

View file

@ -11,4 +11,5 @@ namespace YAML
class IllegalMapKey: public Exception {}; class IllegalMapKey: public Exception {};
class IllegalMapValue: public Exception {}; class IllegalMapValue: public Exception {};
class IllegalScalar: public Exception {}; class IllegalScalar: public Exception {};
class IllegalTabInScalar: public Exception {};
} }

View file

@ -1,6 +1,7 @@
#include "scanner.h" #include "scanner.h"
#include "token.h" #include "token.h"
#include "exceptions.h" #include "exceptions.h"
#include <iostream>
namespace YAML namespace YAML
{ {
@ -32,30 +33,42 @@ namespace YAML
return INPUT.get(); return INPUT.get();
} }
// Eat
// . Eats 'n' characters and updates our position.
void Scanner::Eat(int n)
{
for(int i=0;i<n;i++) {
m_column++;
char ch = INPUT.get();
if(ch == '\n')
m_column = 0;
}
}
// Peek
// . Peeks at the next 'n' characters and returns them in a string.
std::string Scanner::Peek(int n)
{
std::string ret;
// extract n - 1 characters, and peek at the nth
for(int i=0;i<n-1;i++)
ret += INPUT.get();
ret += INPUT.peek();
// and put back the n - 1 characters we STOLE
for(int i=n-2;i>=0;i--)
INPUT.putback(ret[i]);
return ret;
}
// GetLineBreak // GetLineBreak
// . Eats with no checking // . Eats with no checking
void Scanner::EatLineBreak() void Scanner::EatLineBreak()
{ {
Eat(1);
m_column = 0; m_column = 0;
INPUT.get();
}
// EatDocumentStart
// . Eats with no checking
void Scanner::EatDocumentStart()
{
INPUT.get();
INPUT.get();
INPUT.get();
}
// EatDocumentEnd
// . Eats with no checking
void Scanner::EatDocumentEnd()
{
INPUT.get();
INPUT.get();
INPUT.get();
} }
// IsWhitespaceToBeEaten // IsWhitespaceToBeEaten
@ -65,10 +78,8 @@ namespace YAML
// a. In the flow context // a. In the flow context
// b. In the block context but not where a simple key could be allowed // b. In the block context but not where a simple key could be allowed
// (i.e., not at the beginning of a line, or following '-', '?', or ':') // (i.e., not at the beginning of a line, or following '-', '?', or ':')
bool Scanner::IsWhitespaceToBeEaten() bool Scanner::IsWhitespaceToBeEaten(char ch)
{ {
char ch = INPUT.peek();
if(ch == ' ') if(ch == ' ')
return true; return true;
@ -79,17 +90,15 @@ namespace YAML
} }
// IsLineBreak // IsLineBreak
bool Scanner::IsLineBreak() bool Scanner::IsLineBreak(char ch)
{ {
char ch = INPUT.peek();
return ch == '\n'; // TODO: More types of line breaks return ch == '\n'; // TODO: More types of line breaks
} }
// IsBlank // IsBlank
bool Scanner::IsBlank() bool Scanner::IsBlank(char ch)
{ {
char ch = INPUT.peek(); return IsLineBreak(ch) || ch == ' ' || ch == '\t' || ch == EOF;
return IsLineBreak() || ch == ' ' || ch == '\t' || ch == EOF;
} }
// IsDocumentStart // IsDocumentStart
@ -99,34 +108,8 @@ namespace YAML
if(m_column != 0) if(m_column != 0)
return false; return false;
// then needs '---' std::string next = Peek(4);
for(int i=0;i<3;i++) { return next[0] == '-' && next[1] == '-' && next[2] == '-' && IsBlank(next[3]);
if(INPUT.peek() != '-') {
// first put 'em back
for(int j=0;j<i;j++)
INPUT.putback('-');
// and return
return false;
}
INPUT.get();
}
// then needs a blank character (or eof)
if(!IsBlank()) {
// put 'em back
for(int i=0;i<3;i++)
INPUT.putback('-');
// and return
return false;
}
// finally, put 'em back and go
for(int i=0;i<3;i++)
INPUT.putback('-');
return true;
} }
// IsDocumentEnd // IsDocumentEnd
@ -136,88 +119,29 @@ namespace YAML
if(m_column != 0) if(m_column != 0)
return false; return false;
// then needs '...' std::string next = Peek(4);
for(int i=0;i<3;i++) { return next[0] == '.' && next[1] == '.' && next[2] == '.' && IsBlank(next[3]);
if(INPUT.peek() != '.') {
// first put 'em back
for(int j=0;j<i;j++)
INPUT.putback('.');
// and return
return false;
}
INPUT.get();
}
// then needs a blank character (or eof)
if(!IsBlank()) {
// put 'em back
for(int i=0;i<3;i++)
INPUT.putback('.');
// and return
return false;
}
// finally, put 'em back and go
for(int i=0;i<3;i++)
INPUT.putback('-');
return true;
} }
// IsBlockEntry // IsBlockEntry
bool Scanner::IsBlockEntry() bool Scanner::IsBlockEntry()
{ {
if(INPUT.peek() != Keys::BlockEntry) std::string next = Peek(2);
return false; return next[0] == Keys::BlockEntry && IsBlank(next[1]);
INPUT.get();
// then needs a blank character (or eof)
if(!IsBlank()) {
INPUT.putback(Keys::BlockEntry);
return false;
}
INPUT.putback(Keys::BlockEntry);
return true;
} }
// IsKey // IsKey
bool Scanner::IsKey() bool Scanner::IsKey()
{ {
if(INPUT.peek() != Keys::Key) std::string next = Peek(2);
return false; return next[0] == Keys::Key && (IsBlank(next[1]) || m_flowLevel > 0);
INPUT.get();
// then needs a blank character (or eof), if we're in block context
if(m_flowLevel == 0 && !IsBlank()) {
INPUT.putback(Keys::BlockEntry);
return false;
}
INPUT.putback(Keys::BlockEntry);
return true;
} }
// IsValue // IsValue
bool Scanner::IsValue() bool Scanner::IsValue()
{ {
if(INPUT.peek() != Keys::Value) std::string next = Peek(2);
return false; return next[0] == Keys::Value && (IsBlank(next[1]) || m_flowLevel > 0);
INPUT.get();
// then needs a blank character (or eof), if we're in block context
if(m_flowLevel == 0 && !IsBlank()) {
INPUT.putback(Keys::BlockEntry);
return false;
}
INPUT.putback(Keys::BlockEntry);
return true;
} }
// IsPlainScalar // IsPlainScalar
@ -228,34 +152,25 @@ namespace YAML
// . In the flow context ? : are illegal and - must not be followed with a space. // . In the flow context ? : are illegal and - must not be followed with a space.
bool Scanner::IsPlainScalar() bool Scanner::IsPlainScalar()
{ {
if(IsBlank()) std::string next = Peek(2);
if(IsBlank(next[0]))
return false; return false;
// never characters // never characters
std::string never = ",[]{}#&*!|>\'\"%@`"; if(std::string(",[]{}#&*!|>\'\"%@`").find(next[0]) != std::string::npos)
for(unsigned i=0;i<never.size();i++) return false;
if(INPUT.peek() == never[i])
return false;
// specific block/flow characters // specific block/flow characters
if(m_flowLevel == 0) { if(m_flowLevel == 0) {
if(INPUT.peek() == '-' || INPUT.peek() == '?' || INPUT.peek() == ':') { if((next[0] == '-' || next[0] == '?' || next[0] == ':') && IsBlank(next[1]))
char ch = INPUT.get(); return false;
if(IsBlank()) { } else {
INPUT.putback(ch); if(next[0] == '?' || next[0] == ':')
return false; return false;
}
} if(next[0] == '-' && IsBlank(next[1]))
} else {
if(INPUT.peek() == '?' || INPUT.peek() == ':')
return false; return false;
if(INPUT.peek() == '-') {
INPUT.get();
if(IsBlank()) {
INPUT.putback('-');
return false;
}
}
} }
return true; return true;
@ -311,8 +226,8 @@ namespace YAML
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// eat it // eat
EatDocumentStart(); Eat(3);
return pToken; return pToken;
} }
@ -325,8 +240,8 @@ namespace YAML
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// eat it // eat
EatDocumentEnd(); Eat(3);
return pToken; return pToken;
} }
@ -419,7 +334,7 @@ namespace YAML
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
// eat // eat
INPUT.get(); Eat(1);
return pToken; return pToken;
} }
@ -443,7 +358,7 @@ namespace YAML
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// eat // eat
INPUT.get(); Eat(1);
return pToken; return pToken;
} }
@ -453,61 +368,90 @@ namespace YAML
// TODO: Is it a simple key? // TODO: Is it a simple key?
if(false) { if(false) {
} else { } else {
// If not, ... // If not, ...
// are we in block context? // are we in block context?
if(m_flowLevel == 0) { if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed) if(!m_simpleKeyAllowed)
throw IllegalMapValue(); throw IllegalMapValue();
PushIndentTo(m_column, false); PushIndentTo(m_column, false);
} }
} }
// can only put a simple key here if we're in block context // can only put a simple key here if we're in block context
if(m_flowLevel == 0) if(m_flowLevel == 0)
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
else else
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// eat // eat
INPUT.get(); Eat(1);
return pToken; return pToken;
} }
// PlainScalarToken // PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken) template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{ {
// TODO: "save simple key" // TODO: "save simple key"
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// now eat and store the scalar // now eat and store the scalar
while(1) { std::string scalar;
// doc start/end tokens bool leadingBlanks = true;
if(IsDocumentStart() || IsDocumentEnd())
break; while(INPUT) {
// doc start/end tokens
// comment if(IsDocumentStart() || IsDocumentEnd())
if(INPUT.peek() == Keys::Comment) break;
break;
// comment
// first eat non-blanks if(INPUT.peek() == Keys::Comment)
while(!IsBlank()) { break;
// illegal colon in flow context
if(m_flowLevel > 0 && INPUT.peek() == ':') { // first eat non-blanks
INPUT.get(); while(INPUT && !IsBlank(INPUT.peek())) {
if(!IsBlank()) { std::string next = Peek(2);
INPUT.putback(':');
throw IllegalScalar(); // illegal colon in flow context
} if(m_flowLevel > 0 && next[0] == ':') {
INPUT.putback(':'); if(!IsBlank(next[1]))
} throw IllegalScalar();
}
// characters that might end the scalar
// TODO: scanner.c line 3434 // characters that might end the scalar
} if(next[0] == ':' && IsBlank(next[1]))
} break;
if(m_flowLevel > 0 && std::string(",:?[]{}").find(next[0]) != std::string::npos)
break;
scalar += GetChar();
}
// now eat blanks
while(IsBlank(INPUT.peek()) /* || IsBreak(INPUT.peek()) */) {
if(IsBlank(INPUT.peek())) {
if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
// TODO: Store some blanks?
Eat(1);
} else {
Eat(1);
}
}
// TODO: join whitespace
// and finally break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
}
// now modify our token
if(leadingBlanks)
m_simpleKeyAllowed = true;
return pToken; return pToken;
} }
@ -588,18 +532,18 @@ namespace YAML
{ {
while(1) { while(1) {
// first eat whitespace // first eat whitespace
while(IsWhitespaceToBeEaten()) while(IsWhitespaceToBeEaten(INPUT.peek()))
INPUT.get(); Eat(1);
// then eat a comment // then eat a comment
if(INPUT.peek() == Keys::Comment) { if(INPUT.peek() == Keys::Comment) {
// eat until line break // eat until line break
while(INPUT && !IsLineBreak()) while(INPUT && !IsLineBreak(INPUT.peek()))
INPUT.get(); Eat(1);
} }
// if it's NOT a line break, then we're done! // if it's NOT a line break, then we're done!
if(!IsLineBreak()) if(!IsLineBreak(INPUT.peek()))
break; break;
// otherwise, let's eat the line break and keep going // otherwise, let's eat the line break and keep going
@ -651,7 +595,15 @@ namespace YAML
// temporary function for testing // temporary function for testing
void Scanner::Scan() void Scanner::Scan()
{ {
while(INPUT) while(INPUT) {
ScanNextToken(); ScanNextToken();
while(!m_tokens.empty()) {
Token *pToken = m_tokens.front();
m_tokens.pop();
std::cout << typeid(*pToken).name() << std::endl;
delete pToken;
}
}
} }
} }

View file

@ -43,13 +43,14 @@ namespace YAML
private: private:
char GetChar(); char GetChar();
void EatLineBreak(); void Eat(int n = 1);
void EatDocumentStart(); std::string Peek(int n);
void EatDocumentEnd();
bool IsWhitespaceToBeEaten(); void EatLineBreak();
bool IsLineBreak();
bool IsBlank(); bool IsWhitespaceToBeEaten(char ch);
bool IsLineBreak(char ch);
bool IsBlank(char ch);
bool IsDocumentStart(); bool IsDocumentStart();
bool IsDocumentEnd(); bool IsDocumentEnd();
bool IsBlockEntry(); bool IsBlockEntry();

View file

@ -2,7 +2,7 @@
namespace YAML namespace YAML
{ {
class Token {}; class Token { public: virtual ~Token() {} };
class StreamStartToken: public Token {}; class StreamStartToken: public Token {};
class StreamEndToken: public Token {}; class StreamEndToken: public Token {};