mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 08:21:29 +12:00
Added stream input to the regular expressions, greatly simplifying the usage (in particular, we no longer have to specify the number of characters to be checked).
This commit is contained in:
parent
4e435b1321
commit
de29068110
4 changed files with 232 additions and 85 deletions
208
regex.cpp
208
regex.cpp
|
@ -2,30 +2,60 @@
|
||||||
|
|
||||||
namespace YAML
|
namespace YAML
|
||||||
{
|
{
|
||||||
RegEx::RegEx(REGEX_OP op): m_op(op)
|
RegEx::RegEx(REGEX_OP op): m_op(op), m_pOp(0)
|
||||||
{
|
{
|
||||||
|
SetOp();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::RegEx(): m_op(REGEX_EMPTY)
|
RegEx::RegEx(const RegEx& rhs): m_pOp(0)
|
||||||
{
|
{
|
||||||
|
m_op = rhs.m_op;
|
||||||
|
m_a = rhs.m_a;
|
||||||
|
m_z = rhs.m_z;
|
||||||
|
m_params = rhs.m_params;
|
||||||
|
|
||||||
|
SetOp();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_a(ch)
|
RegEx::RegEx(): m_op(REGEX_EMPTY), m_pOp(0)
|
||||||
{
|
{
|
||||||
|
SetOp();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_a(a), m_z(z)
|
RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_pOp(0), m_a(ch)
|
||||||
{
|
{
|
||||||
|
SetOp();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op)
|
RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_pOp(0), m_a(a), m_z(z)
|
||||||
|
{
|
||||||
|
SetOp();
|
||||||
|
}
|
||||||
|
|
||||||
|
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0)
|
||||||
{
|
{
|
||||||
for(unsigned i=0;i<str.size();i++)
|
for(unsigned i=0;i<str.size();i++)
|
||||||
m_params.push_back(RegEx(str[0]));
|
m_params.push_back(RegEx(str[0]));
|
||||||
|
|
||||||
|
SetOp();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::~RegEx()
|
RegEx::~RegEx()
|
||||||
{
|
{
|
||||||
|
delete m_pOp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegEx::SetOp()
|
||||||
|
{
|
||||||
|
delete m_pOp;
|
||||||
|
m_pOp = 0;
|
||||||
|
switch(m_op) {
|
||||||
|
case REGEX_MATCH: m_pOp = new MatchOperator; break;
|
||||||
|
case REGEX_RANGE: m_pOp = new RangeOperator; break;
|
||||||
|
case REGEX_OR: m_pOp = new OrOperator; break;
|
||||||
|
case REGEX_NOT: m_pOp = new NotOperator; break;
|
||||||
|
case REGEX_SEQ: m_pOp = new SeqOperator; break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RegEx::Matches(char ch) const
|
bool RegEx::Matches(char ch) const
|
||||||
|
@ -40,6 +70,11 @@ namespace YAML
|
||||||
return Match(str) >= 0;
|
return Match(str) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RegEx::Matches(std::istream& in) const
|
||||||
|
{
|
||||||
|
return Match(in) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Match
|
// Match
|
||||||
// . Matches the given string against this regular expression.
|
// . Matches the given string against this regular expression.
|
||||||
// . Returns the number of characters matched.
|
// . Returns the number of characters matched.
|
||||||
|
@ -49,44 +84,36 @@ namespace YAML
|
||||||
// but that of course matches zero characters).
|
// but that of course matches zero characters).
|
||||||
int RegEx::Match(const std::string& str) const
|
int RegEx::Match(const std::string& str) const
|
||||||
{
|
{
|
||||||
switch(m_op) {
|
if(!m_pOp)
|
||||||
case REGEX_EMPTY:
|
|
||||||
if(str.empty())
|
|
||||||
return 0;
|
|
||||||
return -1;
|
return -1;
|
||||||
case REGEX_MATCH:
|
|
||||||
if(str.empty() || str[0] != m_a)
|
return m_pOp->Match(str, *this);
|
||||||
return -1;
|
|
||||||
return 1;
|
//case REGEX_EMPTY:
|
||||||
case REGEX_RANGE:
|
// if(str.empty())
|
||||||
if(str.empty() || m_a > str[0] || m_z < str[0])
|
// return 0;
|
||||||
return -1;
|
// return -1;
|
||||||
return 1;
|
|
||||||
case REGEX_NOT:
|
|
||||||
if(m_params.empty())
|
|
||||||
return false;
|
|
||||||
if(m_params[0].Match(str) >= 0)
|
|
||||||
return -1;
|
|
||||||
return 1;
|
|
||||||
case REGEX_OR:
|
|
||||||
for(unsigned i=0;i<m_params.size();i++) {
|
|
||||||
int n = m_params[i].Match(str);
|
|
||||||
if(n >= 0)
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
case REGEX_SEQ:
|
|
||||||
int offset = 0;
|
|
||||||
for(unsigned i=0;i<m_params.size();i++) {
|
|
||||||
int n = m_params[i].Match(str.substr(offset));
|
|
||||||
if(n == -1)
|
|
||||||
return -1;
|
|
||||||
offset += n;
|
|
||||||
}
|
|
||||||
return offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Match
|
||||||
|
// . The stream version does the same thing as the string version;
|
||||||
|
// REMEMBER that we only match from the start of the stream!
|
||||||
|
// . Note: the istream is not a const reference, but we guarantee
|
||||||
|
// that the pointer will be in the same spot, and we'll clear its
|
||||||
|
// flags before we end.
|
||||||
|
int RegEx::Match(std::istream& in) const
|
||||||
|
{
|
||||||
|
if(!m_pOp)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
int pos = in.tellg();
|
||||||
|
int ret = m_pOp->Match(in, *this);
|
||||||
|
|
||||||
|
// reset input stream!
|
||||||
|
in.clear();
|
||||||
|
in.seekg(pos);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx operator ! (const RegEx& ex)
|
RegEx operator ! (const RegEx& ex)
|
||||||
|
@ -111,4 +138,107 @@ namespace YAML
|
||||||
ret.m_params.push_back(ex2);
|
ret.m_params.push_back(ex2);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Operators
|
||||||
|
|
||||||
|
// MatchOperator
|
||||||
|
int RegEx::MatchOperator::Match(const std::string& str, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
if(str.empty() || str[0] != regex.m_a)
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int RegEx::MatchOperator::Match(std::istream& in, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
if(!in || in.peek() != regex.m_a)
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// RangeOperator
|
||||||
|
int RegEx::RangeOperator::Match(const std::string& str, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
if(str.empty() || regex.m_a > str[0] || regex.m_z < str[0])
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int RegEx::RangeOperator::Match(std::istream& in, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
if(!in || regex.m_a > in.peek() || regex.m_z < in.peek())
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// OrOperator
|
||||||
|
int RegEx::OrOperator::Match(const std::string& str, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
for(unsigned i=0;i<regex.m_params.size();i++) {
|
||||||
|
int n = regex.m_params[i].Match(str);
|
||||||
|
if(n >= 0)
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int RegEx::OrOperator::Match(std::istream& in, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
for(unsigned i=0;i<regex.m_params.size();i++) {
|
||||||
|
int n = regex.m_params[i].Match(in);
|
||||||
|
if(n >= 0)
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NotOperator
|
||||||
|
int RegEx::NotOperator::Match(const std::string& str, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
if(regex.m_params.empty())
|
||||||
|
return -1;
|
||||||
|
if(regex.m_params[0].Match(str) >= 0)
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int RegEx::NotOperator::Match(std::istream& in, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
if(regex.m_params.empty())
|
||||||
|
return -1;
|
||||||
|
if(regex.m_params[0].Match(in) >= 0)
|
||||||
|
return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// SeqOperator
|
||||||
|
int RegEx::SeqOperator::Match(const std::string& str, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
for(unsigned i=0;i<regex.m_params.size();i++) {
|
||||||
|
int n = regex.m_params[i].Match(str.substr(offset));
|
||||||
|
if(n == -1)
|
||||||
|
return -1;
|
||||||
|
offset += n;
|
||||||
|
}
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
int RegEx::SeqOperator::Match(std::istream& in, const RegEx& regex) const
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
for(unsigned i=0;i<regex.m_params.size();i++) {
|
||||||
|
int n = regex.m_params[i].Match(in);
|
||||||
|
if(n == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
offset += n;
|
||||||
|
in.seekg(n, std::ios_base::cur);
|
||||||
|
}
|
||||||
|
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
43
regex.h
43
regex.h
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <ios>
|
||||||
|
|
||||||
namespace YAML
|
namespace YAML
|
||||||
{
|
{
|
||||||
|
@ -10,17 +11,55 @@ namespace YAML
|
||||||
// simplified regular expressions
|
// simplified regular expressions
|
||||||
// . Only straightforward matches (no repeated characters)
|
// . Only straightforward matches (no repeated characters)
|
||||||
// . Only matches from start of string
|
// . Only matches from start of string
|
||||||
class RegEx {
|
class RegEx
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
struct Operator {
|
||||||
|
virtual ~Operator() {}
|
||||||
|
virtual int Match(const std::string& str, const RegEx& regex) const = 0;
|
||||||
|
virtual int Match(std::istream& in, const RegEx& regex) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchOperator: public Operator {
|
||||||
|
virtual int Match(const std::string& str, const RegEx& regex) const;
|
||||||
|
virtual int Match(std::istream& in, const RegEx& regex) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RangeOperator: public Operator {
|
||||||
|
virtual int Match(const std::string& str, const RegEx& regex) const;
|
||||||
|
virtual int Match(std::istream& in, const RegEx& regex) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OrOperator: public Operator {
|
||||||
|
virtual int Match(const std::string& str, const RegEx& regex) const;
|
||||||
|
virtual int Match(std::istream& in, const RegEx& regex) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NotOperator: public Operator {
|
||||||
|
virtual int Match(const std::string& str, const RegEx& regex) const;
|
||||||
|
virtual int Match(std::istream& in, const RegEx& regex) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SeqOperator: public Operator {
|
||||||
|
virtual int Match(const std::string& str, const RegEx& regex) const;
|
||||||
|
virtual int Match(std::istream& in, const RegEx& regex) const;
|
||||||
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
friend struct Operator;
|
||||||
|
|
||||||
RegEx();
|
RegEx();
|
||||||
RegEx(char ch);
|
RegEx(char ch);
|
||||||
RegEx(char a, char z);
|
RegEx(char a, char z);
|
||||||
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
|
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
|
||||||
|
RegEx(const RegEx& rhs);
|
||||||
~RegEx();
|
~RegEx();
|
||||||
|
|
||||||
bool Matches(char ch) const;
|
bool Matches(char ch) const;
|
||||||
bool Matches(const std::string& str) const;
|
bool Matches(const std::string& str) const;
|
||||||
|
bool Matches(std::istream& in) const;
|
||||||
int Match(const std::string& str) const;
|
int Match(const std::string& str) const;
|
||||||
|
int Match(std::istream& in) const;
|
||||||
|
|
||||||
friend RegEx operator ! (const RegEx& ex);
|
friend RegEx operator ! (const RegEx& ex);
|
||||||
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2);
|
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2);
|
||||||
|
@ -28,9 +67,11 @@ namespace YAML
|
||||||
|
|
||||||
private:
|
private:
|
||||||
RegEx(REGEX_OP op);
|
RegEx(REGEX_OP op);
|
||||||
|
void SetOp();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
REGEX_OP m_op;
|
REGEX_OP m_op;
|
||||||
|
Operator *m_pOp;
|
||||||
char m_a, m_z;
|
char m_a, m_z;
|
||||||
std::vector <RegEx> m_params;
|
std::vector <RegEx> m_params;
|
||||||
};
|
};
|
||||||
|
|
56
scanner.cpp
56
scanner.cpp
|
@ -48,22 +48,6 @@ namespace YAML
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Peek
|
|
||||||
// . Peeks at the next 'n' characters and returns them in a string.
|
|
||||||
std::string Scanner::Peek(int n)
|
|
||||||
{
|
|
||||||
std::string ret;
|
|
||||||
|
|
||||||
int pos = INPUT.tellg();
|
|
||||||
for(int i=0;i<n;i++)
|
|
||||||
ret += INPUT.get();
|
|
||||||
|
|
||||||
INPUT.clear();
|
|
||||||
INPUT.seekg(pos);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetLineBreak
|
// GetLineBreak
|
||||||
// . Eats with no checking
|
// . Eats with no checking
|
||||||
void Scanner::EatLineBreak()
|
void Scanner::EatLineBreak()
|
||||||
|
@ -97,7 +81,7 @@ namespace YAML
|
||||||
if(m_column != 0)
|
if(m_column != 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return Exp::DocStart.Matches(Peek(4));
|
return Exp::DocStart.Matches(INPUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsDocumentEnd
|
// IsDocumentEnd
|
||||||
|
@ -107,41 +91,37 @@ namespace YAML
|
||||||
if(m_column != 0)
|
if(m_column != 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return Exp::DocEnd.Matches(Peek(4));
|
return Exp::DocEnd.Matches(INPUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsBlockEntry
|
// IsBlockEntry
|
||||||
bool Scanner::IsBlockEntry()
|
bool Scanner::IsBlockEntry()
|
||||||
{
|
{
|
||||||
return Exp::BlockEntry.Matches(Peek(2));
|
return Exp::BlockEntry.Matches(INPUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsKey
|
// IsKey
|
||||||
bool Scanner::IsKey()
|
bool Scanner::IsKey()
|
||||||
{
|
{
|
||||||
std::string next = Peek(2);
|
|
||||||
if(m_flowLevel > 0)
|
if(m_flowLevel > 0)
|
||||||
return Exp::KeyInFlow.Matches(next);
|
return Exp::KeyInFlow.Matches(INPUT);
|
||||||
return Exp::Key.Matches(next);
|
return Exp::Key.Matches(INPUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValue
|
// IsValue
|
||||||
bool Scanner::IsValue()
|
bool Scanner::IsValue()
|
||||||
{
|
{
|
||||||
std::string next = Peek(2);
|
|
||||||
if(m_flowLevel > 0)
|
if(m_flowLevel > 0)
|
||||||
return Exp::ValueInFlow.Matches(next);
|
return Exp::ValueInFlow.Matches(INPUT);
|
||||||
return Exp::Value.Matches(next);
|
return Exp::Value.Matches(INPUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsPlainScalar
|
// IsPlainScalar
|
||||||
// . Rules:
|
|
||||||
bool Scanner::IsPlainScalar()
|
bool Scanner::IsPlainScalar()
|
||||||
{
|
{
|
||||||
std::string next = Peek(2);
|
|
||||||
if(m_flowLevel > 0)
|
if(m_flowLevel > 0)
|
||||||
return Exp::PlainScalarInFlow.Matches(next);
|
return Exp::PlainScalarInFlow.Matches(INPUT);
|
||||||
return Exp::PlainScalar.Matches(next);
|
return Exp::PlainScalar.Matches(INPUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
@ -368,21 +348,19 @@ namespace YAML
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// comment
|
// comment
|
||||||
if(Exp::Comment.Matches(INPUT.peek()))
|
if(Exp::Comment.Matches(INPUT))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// first eat non-blanks
|
// first eat non-blanks
|
||||||
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT.peek())) {
|
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
|
||||||
std::string next = Peek(2);
|
|
||||||
|
|
||||||
// illegal colon in flow context
|
// illegal colon in flow context
|
||||||
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(next))
|
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
|
||||||
throw IllegalScalar();
|
throw IllegalScalar();
|
||||||
|
|
||||||
// characters that might end the scalar
|
// characters that might end the scalar
|
||||||
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(next))
|
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
|
||||||
break;
|
break;
|
||||||
if(m_flowLevel == 0 && Exp::EndScalar.Matches(next))
|
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if(leadingBlanks) {
|
if(leadingBlanks) {
|
||||||
|
@ -409,12 +387,12 @@ namespace YAML
|
||||||
}
|
}
|
||||||
|
|
||||||
// did we hit a non-blank character that ended us?
|
// did we hit a non-blank character that ended us?
|
||||||
if(!Exp::BlankOrBreak.Matches(INPUT.peek()))
|
if(!Exp::BlankOrBreak.Matches(INPUT))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// now eat blanks
|
// now eat blanks
|
||||||
while(INPUT && Exp::BlankOrBreak.Matches(INPUT.peek())) {
|
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
|
||||||
if(Exp::Blank.Matches(INPUT.peek())) {
|
if(Exp::Blank.Matches(INPUT)) {
|
||||||
if(leadingBlanks && m_column <= m_indents.top())
|
if(leadingBlanks && m_column <= m_indents.top())
|
||||||
throw IllegalTabInScalar();
|
throw IllegalTabInScalar();
|
||||||
|
|
||||||
|
|
|
@ -71,8 +71,6 @@ namespace YAML
|
||||||
private:
|
private:
|
||||||
char GetChar();
|
char GetChar();
|
||||||
void Eat(int n = 1);
|
void Eat(int n = 1);
|
||||||
std::string Peek(int n);
|
|
||||||
|
|
||||||
void EatLineBreak();
|
void EatLineBreak();
|
||||||
|
|
||||||
bool IsWhitespaceToBeEaten(char ch);
|
bool IsWhitespaceToBeEaten(char ch);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue