Added stream input to the regular expressions, greatly simplifying the usage (in particular, we no longer have to specify the number of characters to be checked).

This commit is contained in:
Jesse Beder 2008-06-27 19:07:30 +00:00
parent 4e435b1321
commit de29068110
4 changed files with 232 additions and 85 deletions

208
regex.cpp
View file

@ -2,30 +2,60 @@
namespace YAML
{
RegEx::RegEx(REGEX_OP op): m_op(op)
RegEx::RegEx(REGEX_OP op): m_op(op), m_pOp(0)
{
SetOp();
}
RegEx::RegEx(): m_op(REGEX_EMPTY)
RegEx::RegEx(const RegEx& rhs): m_pOp(0)
{
m_op = rhs.m_op;
m_a = rhs.m_a;
m_z = rhs.m_z;
m_params = rhs.m_params;
SetOp();
}
RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_a(ch)
RegEx::RegEx(): m_op(REGEX_EMPTY), m_pOp(0)
{
SetOp();
}
RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_a(a), m_z(z)
RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_pOp(0), m_a(ch)
{
SetOp();
}
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op)
RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_pOp(0), m_a(a), m_z(z)
{
SetOp();
}
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0)
{
for(unsigned i=0;i<str.size();i++)
m_params.push_back(RegEx(str[0]));
SetOp();
}
RegEx::~RegEx()
{
delete m_pOp;
}
void RegEx::SetOp()
{
delete m_pOp;
m_pOp = 0;
switch(m_op) {
case REGEX_MATCH: m_pOp = new MatchOperator; break;
case REGEX_RANGE: m_pOp = new RangeOperator; break;
case REGEX_OR: m_pOp = new OrOperator; break;
case REGEX_NOT: m_pOp = new NotOperator; break;
case REGEX_SEQ: m_pOp = new SeqOperator; break;
}
}
bool RegEx::Matches(char ch) const
@ -40,6 +70,11 @@ namespace YAML
return Match(str) >= 0;
}
bool RegEx::Matches(std::istream& in) const
{
return Match(in) >= 0;
}
// Match
// . Matches the given string against this regular expression.
// . Returns the number of characters matched.
@ -49,44 +84,36 @@ namespace YAML
// but that of course matches zero characters).
int RegEx::Match(const std::string& str) const
{
switch(m_op) {
case REGEX_EMPTY:
if(str.empty())
return 0;
if(!m_pOp)
return -1;
case REGEX_MATCH:
if(str.empty() || str[0] != m_a)
return -1;
return 1;
case REGEX_RANGE:
if(str.empty() || m_a > str[0] || m_z < str[0])
return -1;
return 1;
case REGEX_NOT:
if(m_params.empty())
return false;
if(m_params[0].Match(str) >= 0)
return -1;
return 1;
case REGEX_OR:
for(unsigned i=0;i<m_params.size();i++) {
int n = m_params[i].Match(str);
if(n >= 0)
return n;
}
return -1;
case REGEX_SEQ:
int offset = 0;
for(unsigned i=0;i<m_params.size();i++) {
int n = m_params[i].Match(str.substr(offset));
if(n == -1)
return -1;
offset += n;
}
return offset;
return m_pOp->Match(str, *this);
//case REGEX_EMPTY:
// if(str.empty())
// return 0;
// return -1;
}
// Match
// . The stream version does the same thing as the string version;
// REMEMBER that we only match from the start of the stream!
// . Note: the istream is not a const reference, but we guarantee
// that the pointer will be in the same spot, and we'll clear its
// flags before we end.
int RegEx::Match(std::istream& in) const
{
if(!m_pOp)
return -1;
int pos = in.tellg();
int ret = m_pOp->Match(in, *this);
// reset input stream!
in.clear();
in.seekg(pos);
return ret;
}
RegEx operator ! (const RegEx& ex)
@ -111,4 +138,107 @@ namespace YAML
ret.m_params.push_back(ex2);
return ret;
}
//////////////////////////////////////////////////////////////////////////////
// Operators
// MatchOperator
int RegEx::MatchOperator::Match(const std::string& str, const RegEx& regex) const
{
if(str.empty() || str[0] != regex.m_a)
return -1;
return 1;
}
int RegEx::MatchOperator::Match(std::istream& in, const RegEx& regex) const
{
if(!in || in.peek() != regex.m_a)
return -1;
return 1;
}
// RangeOperator
int RegEx::RangeOperator::Match(const std::string& str, const RegEx& regex) const
{
if(str.empty() || regex.m_a > str[0] || regex.m_z < str[0])
return -1;
return 1;
}
int RegEx::RangeOperator::Match(std::istream& in, const RegEx& regex) const
{
if(!in || regex.m_a > in.peek() || regex.m_z < in.peek())
return -1;
return 1;
}
// OrOperator
int RegEx::OrOperator::Match(const std::string& str, const RegEx& regex) const
{
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(str);
if(n >= 0)
return n;
}
return -1;
}
int RegEx::OrOperator::Match(std::istream& in, const RegEx& regex) const
{
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in);
if(n >= 0)
return n;
}
return -1;
}
// NotOperator
int RegEx::NotOperator::Match(const std::string& str, const RegEx& regex) const
{
if(regex.m_params.empty())
return -1;
if(regex.m_params[0].Match(str) >= 0)
return -1;
return 1;
}
int RegEx::NotOperator::Match(std::istream& in, const RegEx& regex) const
{
if(regex.m_params.empty())
return -1;
if(regex.m_params[0].Match(in) >= 0)
return -1;
return 1;
}
// SeqOperator
int RegEx::SeqOperator::Match(const std::string& str, const RegEx& regex) const
{
int offset = 0;
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(str.substr(offset));
if(n == -1)
return -1;
offset += n;
}
return offset;
}
int RegEx::SeqOperator::Match(std::istream& in, const RegEx& regex) const
{
int offset = 0;
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in);
if(n == -1)
return -1;
offset += n;
in.seekg(n, std::ios_base::cur);
}
return offset;
}
}

43
regex.h
View file

@ -2,6 +2,7 @@
#include <vector>
#include <string>
#include <ios>
namespace YAML
{
@ -10,17 +11,55 @@ namespace YAML
// simplified regular expressions
// . Only straightforward matches (no repeated characters)
// . Only matches from start of string
class RegEx {
class RegEx
{
private:
struct Operator {
virtual ~Operator() {}
virtual int Match(const std::string& str, const RegEx& regex) const = 0;
virtual int Match(std::istream& in, const RegEx& regex) const = 0;
};
struct MatchOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct RangeOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct OrOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct NotOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct SeqOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
public:
friend struct Operator;
RegEx();
RegEx(char ch);
RegEx(char a, char z);
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
RegEx(const RegEx& rhs);
~RegEx();
bool Matches(char ch) const;
bool Matches(const std::string& str) const;
bool Matches(std::istream& in) const;
int Match(const std::string& str) const;
int Match(std::istream& in) const;
friend RegEx operator ! (const RegEx& ex);
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2);
@ -28,9 +67,11 @@ namespace YAML
private:
RegEx(REGEX_OP op);
void SetOp();
private:
REGEX_OP m_op;
Operator *m_pOp;
char m_a, m_z;
std::vector <RegEx> m_params;
};

View file

@ -48,22 +48,6 @@ namespace YAML
}
}
// Peek
// . Peeks at the next 'n' characters and returns them in a string.
std::string Scanner::Peek(int n)
{
std::string ret;
int pos = INPUT.tellg();
for(int i=0;i<n;i++)
ret += INPUT.get();
INPUT.clear();
INPUT.seekg(pos);
return ret;
}
// GetLineBreak
// . Eats with no checking
void Scanner::EatLineBreak()
@ -97,7 +81,7 @@ namespace YAML
if(m_column != 0)
return false;
return Exp::DocStart.Matches(Peek(4));
return Exp::DocStart.Matches(INPUT);
}
// IsDocumentEnd
@ -107,41 +91,37 @@ namespace YAML
if(m_column != 0)
return false;
return Exp::DocEnd.Matches(Peek(4));
return Exp::DocEnd.Matches(INPUT);
}
// IsBlockEntry
bool Scanner::IsBlockEntry()
{
return Exp::BlockEntry.Matches(Peek(2));
return Exp::BlockEntry.Matches(INPUT);
}
// IsKey
bool Scanner::IsKey()
{
std::string next = Peek(2);
if(m_flowLevel > 0)
return Exp::KeyInFlow.Matches(next);
return Exp::Key.Matches(next);
return Exp::KeyInFlow.Matches(INPUT);
return Exp::Key.Matches(INPUT);
}
// IsValue
bool Scanner::IsValue()
{
std::string next = Peek(2);
if(m_flowLevel > 0)
return Exp::ValueInFlow.Matches(next);
return Exp::Value.Matches(next);
return Exp::ValueInFlow.Matches(INPUT);
return Exp::Value.Matches(INPUT);
}
// IsPlainScalar
// . Rules:
bool Scanner::IsPlainScalar()
{
std::string next = Peek(2);
if(m_flowLevel > 0)
return Exp::PlainScalarInFlow.Matches(next);
return Exp::PlainScalar.Matches(next);
return Exp::PlainScalarInFlow.Matches(INPUT);
return Exp::PlainScalar.Matches(INPUT);
}
///////////////////////////////////////////////////////////////////////
@ -368,21 +348,19 @@ namespace YAML
break;
// comment
if(Exp::Comment.Matches(INPUT.peek()))
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT.peek())) {
std::string next = Peek(2);
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(next))
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(next))
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(next))
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
if(leadingBlanks) {
@ -409,12 +387,12 @@ namespace YAML
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT.peek()))
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT.peek())) {
if(Exp::Blank.Matches(INPUT.peek())) {
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();

View file

@ -71,8 +71,6 @@ namespace YAML
private:
char GetChar();
void Eat(int n = 1);
std::string Peek(int n);
void EatLineBreak();
bool IsWhitespaceToBeEaten(char ch);