Moved the input stream, together with line/column info, into its own class, which allowed some other stuff just to pass the stream, and not have to be a member of Scanner.

This commit is contained in:
Jesse Beder 2008-06-29 06:32:13 +00:00
parent 0d5a97bffe
commit ff99f85a6d
13 changed files with 162 additions and 142 deletions

21
exp.cpp
View file

@ -34,13 +34,12 @@ namespace YAML
// Escape
// . Translates the next 'codeLength' characters into a hex number and returns the result.
// . Throws if it's not actually hex.
std::string Escape(std::istream& in, int& length, int codeLength)
std::string Escape(Stream& in, int codeLength)
{
// grab string
length += codeLength;
std::string str;
for(int i=0;i<codeLength;i++)
str += in.get();
str += in.GetChar();
// get the value
unsigned value = ParseHex(str);
@ -64,18 +63,14 @@ namespace YAML
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\' or single quote)
// and returns the result.
// . Fills 'length' with how many characters we ate.
// . Throws if it's an unknown escape character.
std::string Escape(std::istream& in, int& length)
std::string Escape(Stream& in)
{
// slash + character
length = 2;
// eat slash
char escape = in.get();
char escape = in.GetChar();
// switch on escape character
char ch = in.get();
char ch = in.GetChar();
// first do single quote, since it's easier
if(escape == '\'' && ch == '\'')
@ -101,9 +96,9 @@ namespace YAML
case '_': return "\xC2\xA0"; // #xA0
case 'L': return "\xE2\x80\xA8"; // LS (#x2028)
case 'P': return "\xE2\x80\xA9"; // PS (#x2029)
case 'x': return Escape(in, length, 2);
case 'u': return Escape(in, length, 4);
case 'U': return Escape(in, length, 8);
case 'x': return Escape(in, 2);
case 'u': return Escape(in, 4);
case 'U': return Escape(in, 8);
}
throw UnknownEscapeSequence(ch);

3
exp.h
View file

@ -3,6 +3,7 @@
#include "regex.h"
#include <string>
#include <ios>
#include "stream.h"
namespace YAML
{
@ -50,7 +51,7 @@ namespace YAML
const RegEx Chomp = (ChompIndicator + Digit) || (Digit + ChompIndicator) || ChompIndicator || Digit;
// and some functions
std::string Escape(std::istream& in, int& length);
std::string Escape(Stream& in);
}
namespace Keys

View file

@ -3,22 +3,6 @@
int main()
{
YAML::RegEx alpha = YAML::RegEx('a', 'z') || YAML::RegEx('A', 'Z');
alpha.Matches("a");
alpha.Matches("d");
alpha.Matches("F");
alpha.Matches("0");
alpha.Matches("5");
alpha.Matches(" ");
YAML::RegEx blank = YAML::RegEx(' ') || YAML::RegEx('\t');
YAML::RegEx docstart = YAML::RegEx("---") + (blank || YAML::RegEx(EOF) || YAML::RegEx());
docstart.Matches("--- ");
docstart.Matches("... ");
docstart.Matches("----");
docstart.Matches("---\t");
docstart.Matches("---");
YAML::Document doc("test.yaml");
return 0;

View file

@ -7,8 +7,7 @@
namespace YAML
{
Scanner::Scanner(std::istream& in)
: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0),
m_line(0), m_column(0)
: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
{
}
@ -27,45 +26,6 @@ namespace YAML
///////////////////////////////////////////////////////////////////////
// Misc. helpers
// GetChar
// . Extracts a character from the stream and updates our position
char Scanner::GetChar()
{
char ch = INPUT.get();
m_column++;
if(ch == '\n') {
m_column = 0;
m_line++;
}
return ch;
}
// GetChar
// . Extracts 'n' characters from the stream and updates our position
std::string Scanner::GetChar(int n)
{
std::string ret;
for(int i=0;i<n;i++)
ret += GetChar();
return ret;
}
// Eat
// . Eats 'n' characters and updates our position.
void Scanner::Eat(int n)
{
for(int i=0;i<n;i++)
GetChar();
}
// GetLineBreak
// . Eats with no checking
void Scanner::EatLineBreak()
{
Eat(1);
m_column = 0;
}
// IsWhitespaceToBeEaten
// . We can eat whitespace if:
// 1. It's a space
@ -88,7 +48,7 @@ namespace YAML
bool Scanner::IsDocumentStart()
{
// needs to be at the start of a new line
if(m_column != 0)
if(INPUT.column != 0)
return false;
return Exp::DocStart.Matches(INPUT);
@ -98,7 +58,7 @@ namespace YAML
bool Scanner::IsDocumentEnd()
{
// needs to be at the start of a new line
if(m_column != 0)
if(INPUT.column != 0)
return false;
return Exp::DocEnd.Matches(INPUT);
@ -166,7 +126,7 @@ namespace YAML
ValidateSimpleKey();
// maybe need to end some blocks
PopIndentTo(m_column);
PopIndentTo(INPUT.column);
// *****
// And now branch based on the next few characters!
@ -236,13 +196,13 @@ namespace YAML
while(1) {
// first eat whitespace
while(IsWhitespaceToBeEaten(INPUT.peek()))
Eat(1);
INPUT.Eat(1);
// then eat a comment
if(Exp::Comment.Matches(INPUT)) {
// eat until line break
while(INPUT && !Exp::Break.Matches(INPUT))
Eat(1);
INPUT.Eat(1);
}
// if it's NOT a line break, then we're done!
@ -250,7 +210,7 @@ namespace YAML
break;
// otherwise, let's eat the line break and keep going
EatLineBreak();
INPUT.EatLineBreak();
// oh yeah, and let's get rid of that simple key
ValidateSimpleKey();

View file

@ -6,6 +6,7 @@
#include <stack>
#include <set>
#include "regex.h"
#include "stream.h"
namespace YAML
{
@ -31,11 +32,6 @@ namespace YAML
void Scan();
private:
char GetChar();
std::string GetChar(int n);
void Eat(int n = 1);
void EatLineBreak();
bool IsWhitespaceToBeEaten(char ch);
bool IsDocumentStart();
bool IsDocumentEnd();
@ -44,9 +40,6 @@ namespace YAML
bool IsValue();
bool IsPlainScalar();
void GetBlockIndentation(int& indent, std::string& breaks);
std::string ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp);
struct SimpleKey {
SimpleKey(int pos_, int line_, int column_, int flowLevel_);
@ -63,8 +56,7 @@ namespace YAML
private:
// the stream
std::istream& INPUT;
int m_line, m_column;
Stream INPUT;
// the output (tokens)
std::queue <Token *> m_tokens;

View file

@ -142,7 +142,7 @@ namespace YAML
if(m_simpleKeyAllowed)
InsertSimpleKey();
pToken->value = ScanScalar(end, false, indent, 0, true, true, true, 0);
pToken->value = ScanScalar(INPUT, end, false, indent, 0, true, true, true, 0);
m_simpleKeyAllowed = false;
if(true/*info.leadingBlanks*/)
@ -221,7 +221,7 @@ namespace YAML
//}
// eat single or double quote
char quote = GetChar();
char quote = INPUT.GetChar();
pToken->single = (quote == '\'');
RegEx end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
@ -231,7 +231,7 @@ namespace YAML
if(m_simpleKeyAllowed)
InsertSimpleKey();
pToken->value = ScanScalar(end, true, 0, escape, true, true, false, 0);
pToken->value = ScanScalar(INPUT, end, true, 0, escape, true, true, false, 0);
m_simpleKeyAllowed = false;
return pToken;
@ -243,39 +243,39 @@ namespace YAML
WhitespaceInfo info;
// eat block indicator ('|' or '>')
char indicator = GetChar();
char indicator = INPUT.GetChar();
info.fold = (indicator == Keys::FoldedScalar);
// eat chomping/indentation indicators
int n = Exp::Chomp.Match(INPUT);
for(int i=0;i<n;i++)
info.SetChompers(GetChar());
info.SetChompers(INPUT.GetChar());
// first eat whitespace
while(Exp::Blank.Matches(INPUT))
Eat(1);
INPUT.Eat(1);
// and comments to the end of the line
if(Exp::Comment.Matches(INPUT))
while(INPUT && !Exp::Break.Matches(INPUT))
Eat(1);
INPUT.Eat(1);
// if it's not a line break, then we ran into a bad character inline
if(INPUT && !Exp::Break.Matches(INPUT))
throw UnexpectedCharacterInBlockScalar();
// and eat that baby
EatLineBreak();
INPUT.EatLineBreak();
// set the initial indentation
int indent = info.increment;
if(info.increment && m_indents.top() >= 0)
indent += m_indents.top();
GetBlockIndentation(indent, info.trailingBreaks);
GetBlockIndentation(INPUT, indent, info.trailingBreaks, m_indents.top());
bool eatLeadingWhitespace = false;
pToken->value = ScanScalar(RegEx(), false, indent, 0, info.fold, eatLeadingWhitespace, false, info.chomp);
pToken->value = ScanScalar(INPUT, RegEx(), false, indent, 0, info.fold, eatLeadingWhitespace, false, info.chomp);
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
@ -286,20 +286,20 @@ namespace YAML
// . Helper to scanning a block scalar.
// . Eats leading *indentation* zeros (i.e., those that come before 'indent'),
// and updates 'indent' (if it hasn't been set yet).
void Scanner::GetBlockIndentation(int& indent, std::string& breaks)
void GetBlockIndentation(Stream& INPUT, int& indent, std::string& breaks, int topIndent)
{
int maxIndent = 0;
while(1) {
// eat as many indentation spaces as we can
while((indent == 0 || m_column < indent) && INPUT.peek() == ' ')
Eat(1);
while((indent == 0 || INPUT.column < indent) && INPUT.peek() == ' ')
INPUT.Eat(1);
if(m_column > maxIndent)
maxIndent = m_column;
if(INPUT.column > maxIndent)
maxIndent = INPUT.column;
// do we need more indentation, but we've got a tab?
if((indent == 0 || m_column < indent) && INPUT.peek() == '\t')
if((indent == 0 || INPUT.column < indent) && INPUT.peek() == '\t')
throw IllegalTabInScalar(); // TODO: are literal scalar lines allowed to have tabs here?
// is this a non-empty line?
@ -308,21 +308,21 @@ namespace YAML
// otherwise, eat the line break and move on
int n = Exp::Break.Match(INPUT);
breaks += GetChar(n);
breaks += INPUT.GetChar(n);
}
// finally, set the indentation
if(indent == 0) {
indent = maxIndent;
if(indent < m_indents.top() + 1)
indent = m_indents.top() + 1;
if(indent < topIndent + 1)
indent = topIndent + 1;
if(indent < 1)
indent = 1;
}
}
// ScanScalar
std::string Scanner::ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp)
std::string ScanScalar(Stream& INPUT, RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp)
{
bool emptyLine = false, moreIndented = false;
std::string scalar;
@ -337,20 +337,18 @@ namespace YAML
// escaped newline? (only if we're escaping on slash)
if(escape == '\\' && Exp::EscBreak.Matches(INPUT)) {
int n = Exp::EscBreak.Match(INPUT);
Eat(n);
INPUT.Eat(n);
continue;
}
// escape this?
if(INPUT.peek() == escape) {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
scalar += Exp::Escape(INPUT);
continue;
}
// otherwise, just add the damn character
scalar += GetChar();
scalar += INPUT.GetChar();
}
// eof? if we're looking to eat something, then we throw
@ -364,26 +362,26 @@ namespace YAML
int n = end.Match(INPUT);
if(n >= 0) {
if(eatEnd)
Eat(n);
INPUT.Eat(n);
break;
}
// ********************************
// Phase #2: eat line ending
n = Exp::Break.Match(INPUT);
Eat(n);
INPUT.Eat(n);
// ********************************
// Phase #3: scan initial spaces
// first the required indentation
while(INPUT.peek() == ' ' && m_column < indent)
Eat(1);
while(INPUT.peek() == ' ' && INPUT.column < indent)
INPUT.Eat(1);
// and then the rest of the whitespace
if(eatLeadingWhitespace) {
while(Exp::Blank.Matches(INPUT))
Eat(1);
INPUT.Eat(1);
}
// was this an empty line?
@ -399,7 +397,7 @@ namespace YAML
moreIndented = nextMoreIndented;
// are we done via indentation?
if(!emptyLine && m_column < indent)
if(!emptyLine && INPUT.column < indent)
break;
}

View file

@ -1,9 +1,14 @@
#pragma once
#include <string>
#include "regex.h"
#include "stream.h"
namespace YAML
{
void GetBlockIndentation(Stream& INPUT, int& indent, std::string& breaks, int topIndent);
std::string ScanScalar(Stream& INPUT, RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp);
struct WhitespaceInfo {
WhitespaceInfo();

View file

@ -22,8 +22,8 @@ namespace YAML
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
if(INPUT.column > 0)
INPUT.column = 0;
PopIndentTo(-1);
ValidateAllSimpleKeys();
@ -37,12 +37,12 @@ namespace YAML
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
PopIndentTo(m_column);
PopIndentTo(INPUT.column);
ValidateAllSimpleKeys();
m_simpleKeyAllowed = false;
// eat
Eat(3);
INPUT.Eat(3);
return pToken;
}
@ -54,7 +54,7 @@ namespace YAML
m_simpleKeyAllowed = false;
// eat
Eat(3);
INPUT.Eat(3);
return pToken;
}
@ -67,7 +67,7 @@ namespace YAML
m_simpleKeyAllowed = true;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -80,7 +80,7 @@ namespace YAML
m_simpleKeyAllowed = true;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -94,7 +94,7 @@ namespace YAML
m_simpleKeyAllowed = false;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -108,7 +108,7 @@ namespace YAML
m_simpleKeyAllowed = false;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -118,7 +118,7 @@ namespace YAML
m_simpleKeyAllowed = true;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -133,11 +133,11 @@ namespace YAML
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true);
PushIndentTo(INPUT.column, true);
m_simpleKeyAllowed = true;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -149,7 +149,7 @@ namespace YAML
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
PushIndentTo(INPUT.column, false);
}
// can only put a simple key here if we're in block context
@ -159,7 +159,7 @@ namespace YAML
m_simpleKeyAllowed = false;
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -176,7 +176,7 @@ namespace YAML
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
PushIndentTo(INPUT.column, false);
}
// can only put a simple key here if we're in block context
@ -187,7 +187,7 @@ namespace YAML
}
// eat
Eat(1);
INPUT.Eat(1);
return pToken;
}
@ -200,13 +200,13 @@ namespace YAML
m_simpleKeyAllowed = false;
// eat the indicator
char indicator = GetChar();
char indicator = INPUT.GetChar();
pToken->alias = (indicator == Keys::Alias);
// now eat the content
std::string tag;
while(Exp::AlphaNumeric.Matches(INPUT))
tag += GetChar();
tag += INPUT.GetChar();
// we need to have read SOMETHING!
if(tag.empty())

View file

@ -34,10 +34,10 @@ namespace YAML
// and saves it on a stack.
void Scanner::InsertSimpleKey()
{
SimpleKey key(INPUT.tellg(), m_line, m_column, m_flowLevel);
SimpleKey key(INPUT.pos(), INPUT.line, INPUT.column, m_flowLevel);
// first add a map start, if necessary
key.pMapStart = PushIndentTo(m_column, false);
key.pMapStart = PushIndentTo(INPUT.column, false);
if(key.pMapStart)
key.pMapStart->isValid = false;
// else
@ -79,7 +79,7 @@ namespace YAML
isValid = false;
// also needs to be less than 1024 characters and inline
if(m_line != key.line || (int) INPUT.tellg() - key.pos > 1024)
if(INPUT.line != key.line || INPUT.pos() - key.pos > 1024)
isValid = false;
// invalidate key

43
stream.cpp Normal file
View file

@ -0,0 +1,43 @@
#include "stream.h"
namespace YAML
{
// GetChar
// . Extracts a character from the stream and updates our position
char Stream::GetChar()
{
char ch = input.get();
column++;
if(ch == '\n') {
column = 0;
line++;
}
return ch;
}
// GetChar
// . Extracts 'n' characters from the stream and updates our position
std::string Stream::GetChar(int n)
{
std::string ret;
for(int i=0;i<n;i++)
ret += GetChar();
return ret;
}
// Eat
// . Eats 'n' characters and updates our position.
void Stream::Eat(int n)
{
for(int i=0;i<n;i++)
GetChar();
}
// GetLineBreak
// . Eats with no checking
void Stream::EatLineBreak()
{
Eat(1);
column = 0;
}
}

26
stream.h Normal file
View file

@ -0,0 +1,26 @@
#pragma once
#include <ios>
#include <string>
namespace YAML
{
struct Stream
{
Stream(std::istream& input_): input(input_), line(0), column(0) {}
char peek() { return input.peek(); }
int pos() const { return input.tellg(); }
operator std::istream& () { return input; }
operator bool() { return input.good(); }
bool operator !() { return !input; }
char GetChar();
std::string GetChar(int n);
void Eat(int n = 1);
void EatLineBreak();
std::istream& input;
int line, column;
};
}

View file

@ -1,4 +1,12 @@
---
- "quoted scalar that contains
---
the document start!"
- "quoted scalar\twith a tab\nand a newline"
- 'This is Jesse''s single quote!'
- |
here's a literal:
#include <iostream>
int main()
{
std::cout << "Hello World!\n";
return 0;
}

View file

@ -217,6 +217,10 @@
RelativePath=".\simplekey.cpp"
>
</File>
<File
RelativePath=".\stream.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
@ -271,6 +275,10 @@
RelativePath=".\sequence.h"
>
</File>
<File
RelativePath=".\stream.h"
>
</File>
<File
RelativePath=".\token.h"
>