Added stream input to the regular expressions, greatly simplifying the usage (in particular, we no longer have to specify the number of characters to be checked).

This commit is contained in:
Jesse Beder 2008-06-27 19:07:30 +00:00
parent 4e435b1321
commit de29068110
4 changed files with 232 additions and 85 deletions

214
regex.cpp
View file

@ -2,30 +2,60 @@
namespace YAML namespace YAML
{ {
RegEx::RegEx(REGEX_OP op): m_op(op) RegEx::RegEx(REGEX_OP op): m_op(op), m_pOp(0)
{ {
SetOp();
} }
RegEx::RegEx(): m_op(REGEX_EMPTY) RegEx::RegEx(const RegEx& rhs): m_pOp(0)
{ {
m_op = rhs.m_op;
m_a = rhs.m_a;
m_z = rhs.m_z;
m_params = rhs.m_params;
SetOp();
} }
RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_a(ch) RegEx::RegEx(): m_op(REGEX_EMPTY), m_pOp(0)
{ {
SetOp();
} }
RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_a(a), m_z(z) RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_pOp(0), m_a(ch)
{ {
SetOp();
} }
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op) RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_pOp(0), m_a(a), m_z(z)
{
SetOp();
}
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0)
{ {
for(unsigned i=0;i<str.size();i++) for(unsigned i=0;i<str.size();i++)
m_params.push_back(RegEx(str[0])); m_params.push_back(RegEx(str[0]));
SetOp();
} }
RegEx::~RegEx() RegEx::~RegEx()
{ {
delete m_pOp;
}
void RegEx::SetOp()
{
delete m_pOp;
m_pOp = 0;
switch(m_op) {
case REGEX_MATCH: m_pOp = new MatchOperator; break;
case REGEX_RANGE: m_pOp = new RangeOperator; break;
case REGEX_OR: m_pOp = new OrOperator; break;
case REGEX_NOT: m_pOp = new NotOperator; break;
case REGEX_SEQ: m_pOp = new SeqOperator; break;
}
} }
bool RegEx::Matches(char ch) const bool RegEx::Matches(char ch) const
@ -40,6 +70,11 @@ namespace YAML
return Match(str) >= 0; return Match(str) >= 0;
} }
bool RegEx::Matches(std::istream& in) const
{
return Match(in) >= 0;
}
// Match // Match
// . Matches the given string against this regular expression. // . Matches the given string against this regular expression.
// . Returns the number of characters matched. // . Returns the number of characters matched.
@ -49,44 +84,36 @@ namespace YAML
// but that of course matches zero characters). // but that of course matches zero characters).
int RegEx::Match(const std::string& str) const int RegEx::Match(const std::string& str) const
{ {
switch(m_op) { if(!m_pOp)
case REGEX_EMPTY: return -1;
if(str.empty())
return 0;
return -1;
case REGEX_MATCH:
if(str.empty() || str[0] != m_a)
return -1;
return 1;
case REGEX_RANGE:
if(str.empty() || m_a > str[0] || m_z < str[0])
return -1;
return 1;
case REGEX_NOT:
if(m_params.empty())
return false;
if(m_params[0].Match(str) >= 0)
return -1;
return 1;
case REGEX_OR:
for(unsigned i=0;i<m_params.size();i++) {
int n = m_params[i].Match(str);
if(n >= 0)
return n;
}
return -1;
case REGEX_SEQ:
int offset = 0;
for(unsigned i=0;i<m_params.size();i++) {
int n = m_params[i].Match(str.substr(offset));
if(n == -1)
return -1;
offset += n;
}
return offset;
}
return -1; return m_pOp->Match(str, *this);
//case REGEX_EMPTY:
// if(str.empty())
// return 0;
// return -1;
}
// Match
// . The stream version does the same thing as the string version;
// REMEMBER that we only match from the start of the stream!
// . Note: the istream is not a const reference, but we guarantee
// that the pointer will be in the same spot, and we'll clear its
// flags before we end.
int RegEx::Match(std::istream& in) const
{
if(!m_pOp)
return -1;
int pos = in.tellg();
int ret = m_pOp->Match(in, *this);
// reset input stream!
in.clear();
in.seekg(pos);
return ret;
} }
RegEx operator ! (const RegEx& ex) RegEx operator ! (const RegEx& ex)
@ -111,4 +138,107 @@ namespace YAML
ret.m_params.push_back(ex2); ret.m_params.push_back(ex2);
return ret; return ret;
} }
//////////////////////////////////////////////////////////////////////////////
// Operators
// MatchOperator
int RegEx::MatchOperator::Match(const std::string& str, const RegEx& regex) const
{
if(str.empty() || str[0] != regex.m_a)
return -1;
return 1;
}
int RegEx::MatchOperator::Match(std::istream& in, const RegEx& regex) const
{
if(!in || in.peek() != regex.m_a)
return -1;
return 1;
}
// RangeOperator
int RegEx::RangeOperator::Match(const std::string& str, const RegEx& regex) const
{
if(str.empty() || regex.m_a > str[0] || regex.m_z < str[0])
return -1;
return 1;
}
int RegEx::RangeOperator::Match(std::istream& in, const RegEx& regex) const
{
if(!in || regex.m_a > in.peek() || regex.m_z < in.peek())
return -1;
return 1;
}
// OrOperator
int RegEx::OrOperator::Match(const std::string& str, const RegEx& regex) const
{
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(str);
if(n >= 0)
return n;
}
return -1;
}
int RegEx::OrOperator::Match(std::istream& in, const RegEx& regex) const
{
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in);
if(n >= 0)
return n;
}
return -1;
}
// NotOperator
int RegEx::NotOperator::Match(const std::string& str, const RegEx& regex) const
{
if(regex.m_params.empty())
return -1;
if(regex.m_params[0].Match(str) >= 0)
return -1;
return 1;
}
int RegEx::NotOperator::Match(std::istream& in, const RegEx& regex) const
{
if(regex.m_params.empty())
return -1;
if(regex.m_params[0].Match(in) >= 0)
return -1;
return 1;
}
// SeqOperator
int RegEx::SeqOperator::Match(const std::string& str, const RegEx& regex) const
{
int offset = 0;
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(str.substr(offset));
if(n == -1)
return -1;
offset += n;
}
return offset;
}
int RegEx::SeqOperator::Match(std::istream& in, const RegEx& regex) const
{
int offset = 0;
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in);
if(n == -1)
return -1;
offset += n;
in.seekg(n, std::ios_base::cur);
}
return offset;
}
} }

45
regex.h
View file

@ -2,6 +2,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <ios>
namespace YAML namespace YAML
{ {
@ -10,17 +11,55 @@ namespace YAML
// simplified regular expressions // simplified regular expressions
// . Only straightforward matches (no repeated characters) // . Only straightforward matches (no repeated characters)
// . Only matches from start of string // . Only matches from start of string
class RegEx { class RegEx
{
private:
struct Operator {
virtual ~Operator() {}
virtual int Match(const std::string& str, const RegEx& regex) const = 0;
virtual int Match(std::istream& in, const RegEx& regex) const = 0;
};
struct MatchOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct RangeOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct OrOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct NotOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct SeqOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
public: public:
friend struct Operator;
RegEx(); RegEx();
RegEx(char ch); RegEx(char ch);
RegEx(char a, char z); RegEx(char a, char z);
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ); RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
RegEx(const RegEx& rhs);
~RegEx(); ~RegEx();
bool Matches(char ch) const; bool Matches(char ch) const;
bool Matches(const std::string& str) const; bool Matches(const std::string& str) const;
bool Matches(std::istream& in) const;
int Match(const std::string& str) const; int Match(const std::string& str) const;
int Match(std::istream& in) const;
friend RegEx operator ! (const RegEx& ex); friend RegEx operator ! (const RegEx& ex);
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2); friend RegEx operator || (const RegEx& ex1, const RegEx& ex2);
@ -28,9 +67,11 @@ namespace YAML
private: private:
RegEx(REGEX_OP op); RegEx(REGEX_OP op);
void SetOp();
private: private:
REGEX_OP m_op; REGEX_OP m_op;
Operator *m_pOp;
char m_a, m_z; char m_a, m_z;
std::vector <RegEx> m_params; std::vector <RegEx> m_params;
}; };

View file

@ -48,22 +48,6 @@ namespace YAML
} }
} }
// Peek
// . Peeks at the next 'n' characters and returns them in a string.
std::string Scanner::Peek(int n)
{
std::string ret;
int pos = INPUT.tellg();
for(int i=0;i<n;i++)
ret += INPUT.get();
INPUT.clear();
INPUT.seekg(pos);
return ret;
}
// GetLineBreak // GetLineBreak
// . Eats with no checking // . Eats with no checking
void Scanner::EatLineBreak() void Scanner::EatLineBreak()
@ -97,7 +81,7 @@ namespace YAML
if(m_column != 0) if(m_column != 0)
return false; return false;
return Exp::DocStart.Matches(Peek(4)); return Exp::DocStart.Matches(INPUT);
} }
// IsDocumentEnd // IsDocumentEnd
@ -107,41 +91,37 @@ namespace YAML
if(m_column != 0) if(m_column != 0)
return false; return false;
return Exp::DocEnd.Matches(Peek(4)); return Exp::DocEnd.Matches(INPUT);
} }
// IsBlockEntry // IsBlockEntry
bool Scanner::IsBlockEntry() bool Scanner::IsBlockEntry()
{ {
return Exp::BlockEntry.Matches(Peek(2)); return Exp::BlockEntry.Matches(INPUT);
} }
// IsKey // IsKey
bool Scanner::IsKey() bool Scanner::IsKey()
{ {
std::string next = Peek(2);
if(m_flowLevel > 0) if(m_flowLevel > 0)
return Exp::KeyInFlow.Matches(next); return Exp::KeyInFlow.Matches(INPUT);
return Exp::Key.Matches(next); return Exp::Key.Matches(INPUT);
} }
// IsValue // IsValue
bool Scanner::IsValue() bool Scanner::IsValue()
{ {
std::string next = Peek(2);
if(m_flowLevel > 0) if(m_flowLevel > 0)
return Exp::ValueInFlow.Matches(next); return Exp::ValueInFlow.Matches(INPUT);
return Exp::Value.Matches(next); return Exp::Value.Matches(INPUT);
} }
// IsPlainScalar // IsPlainScalar
// . Rules:
bool Scanner::IsPlainScalar() bool Scanner::IsPlainScalar()
{ {
std::string next = Peek(2);
if(m_flowLevel > 0) if(m_flowLevel > 0)
return Exp::PlainScalarInFlow.Matches(next); return Exp::PlainScalarInFlow.Matches(INPUT);
return Exp::PlainScalar.Matches(next); return Exp::PlainScalar.Matches(INPUT);
} }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
@ -368,21 +348,19 @@ namespace YAML
break; break;
// comment // comment
if(Exp::Comment.Matches(INPUT.peek())) if(Exp::Comment.Matches(INPUT))
break; break;
// first eat non-blanks // first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT.peek())) { while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
std::string next = Peek(2);
// illegal colon in flow context // illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(next)) if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar(); throw IllegalScalar();
// characters that might end the scalar // characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(next)) if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break; break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(next)) if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break; break;
if(leadingBlanks) { if(leadingBlanks) {
@ -409,12 +387,12 @@ namespace YAML
} }
// did we hit a non-blank character that ended us? // did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT.peek())) if(!Exp::BlankOrBreak.Matches(INPUT))
break; break;
// now eat blanks // now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT.peek())) { while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT.peek())) { if(Exp::Blank.Matches(INPUT)) {
if(leadingBlanks && m_column <= m_indents.top()) if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar(); throw IllegalTabInScalar();

View file

@ -71,8 +71,6 @@ namespace YAML
private: private:
char GetChar(); char GetChar();
void Eat(int n = 1); void Eat(int n = 1);
std::string Peek(int n);
void EatLineBreak(); void EatLineBreak();
bool IsWhitespaceToBeEaten(char ch); bool IsWhitespaceToBeEaten(char ch);