Added structure for Regular Expressions, saving work as a backup because of software upgrade.

This commit is contained in:
Nathan Braswell
2013-07-01 22:45:33 -04:00
parent c2520ec2c4
commit 94a7739bd9
9 changed files with 179 additions and 10 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp ) set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp )
include_directories( ${MY_INCLUDES} ) include_directories( ${MY_INCLUDES} )

View File

@@ -3,6 +3,7 @@
#include "util.h" #include "util.h"
#include "StringReader.h" #include "StringReader.h"
#include "RegEx.h"
#include "Symbol.h" #include "Symbol.h"
#include <string> #include <string>
@@ -12,9 +13,12 @@ class Lexer {
Lexer(); Lexer();
Lexer(std::string inputString); Lexer(std::string inputString);
~Lexer(); ~Lexer();
void addRegexString(std::string regExString);
void setInput(std::string inputString); void setInput(std::string inputString);
Symbol* next(); Symbol* next();
private: private:
StringReader reader; std::vector<RegEx*> regExs;
std::string input;
int currentPosition;
}; };
#endif #endif

23
include/RegEx.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef REGEX_H
#define REGEX_H
#include "util.h"
#include "RegExState.h"
#include "Symbol.h"
#include <string>
class RegEx {
public:
RegEx();
RegEx(std::string inPattern);
~RegEx();
int longMatch(std::string stringToMatch);
std::string getPattern();
private:
std::string pattern;
RegExState* begin;
std::vector<RegExState*> currentStates;
};
#endif

27
include/RegExState.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef REGEXSTATE_H
#define REGEXSTATE_H
#include "util.h"
#include "Symbol.h"
#include <string>
#include <vector>
class RegExState {
public:
RegExState(RegExState* inInnerState);
RegExState(char inCharacter);
~RegExState();
void addNext(RegExState* nextState);
bool characterIs(char inCharacter);
std::vector<RegExState*>* advance(char advanceCharacter);
bool isGoal();
private:
std::vector<RegExState*> nextStates;
RegExState* inner;
char character;
};
#endif

View File

@@ -16,6 +16,7 @@ class NodeTree;
class Symbol { class Symbol {
public: public:
Symbol(std::string name, bool isTerminal); Symbol(std::string name, bool isTerminal);
Symbol(std::string name, bool isTerminal, std::string value);
Symbol(std::string name, bool isTerminal, NodeTree* tree); Symbol(std::string name, bool isTerminal, NodeTree* tree);
~Symbol(); ~Symbol();
bool const operator==(const Symbol &other); bool const operator==(const Symbol &other);
@@ -30,8 +31,6 @@ class Symbol {
std::string value; std::string value;
bool terminal; bool terminal;
NodeTree* subTree; NodeTree* subTree;
}; };
#endif #endif

View File

@@ -5,7 +5,8 @@ Lexer::Lexer() {
} }
Lexer::Lexer(std::string inputString) { Lexer::Lexer(std::string inputString) {
reader.setString(inputString); input = inputString;
currentPosition = 0;
} }
Lexer::~Lexer() { Lexer::~Lexer() {
@@ -13,12 +14,24 @@ Lexer::~Lexer() {
} }
void Lexer::setInput(std::string inputString) { void Lexer::setInput(std::string inputString) {
reader.setString(inputString); input = inputString;
}
void Lexer::addRegexString(std::string regExString) {
regExs.push_back(new RegEx(regExString));
} }
Symbol* Lexer::next() { Symbol* Lexer::next() {
std::string token = reader.word(); int longestMatch = 0;
if (token != "") RegEx * longestRegEx = NULL;
return new Symbol("\""+token+"\"", true); std::string remainingString = input.substr(currentPosition,input.length()-1);
return new Symbol("$EOF$", false); for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
int currentMatch = regExs[i]->longMatch(remainingString);
if (currentMatch > longestMatch) {
longestMatch = currentMatch;
longestRegEx = regExs[i];
}
}
currentPosition += longestMatch;
return new Symbol(longestRegEx->getPattern(), true);
} }

60
src/RegEx.cpp Normal file
View File

@@ -0,0 +1,60 @@
#include "RegEx.h"
RegEx::RegEx(std::string inPattern) {
pattern = inPattern;
RegExState* current;
begin = new RegExState(pattern.at(0));
current = begin;
for (int i = 1; i < pattern.length(); i++) {
RegExState* next = new RegExState(pattern.at(i));
current->addNext(next);
current = next;
}
}
RegEx::~RegEx() {
//No cleanup necessary
}
int RegEx::longMatch(std::string stringToMatch) {
int lastMatch = 0;
currentStates = *(begin->advance(stringToMatch.at(0)));
std::vector<RegExState*> nextStates;
for (int i = 1; i < stringToMatch.size(); i++) {
//Go through every current state. Check to see if it is goal, if so update last goal.
//Also, add each state's advance to nextStates
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
if (currentStates[j]->isGoal())
lastMatch = i-1;
std::vector<RegExState*>* addStates = currentStates[j]->advance(stringToMatch.at(i));
nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());
delete addStates;
}
//Now, clear our current states and add eaczh one of our addStates if it is not already in current states
currentStates.clear();
for (std::vector<RegExState*>::size_type j = 0; j < nextStates.size(); j++) {
bool inCurrStates = false;
for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++) {
if (nextStates[j] == currentStates[i])
inCurrStates = true;
}
if (!inCurrStates)
currentStates.push_back(nextStates[j]);
}
nextStates.clear();
//If we can't continue matching, just return our last matched
if (currentStates.size() == 0)
break;
}
//Check to see if we match on the last character in the string
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
if (currentStates[j]->isGoal())
lastMatch = stringToMatch.size()-1;
}
return lastMatch;
}
std::string RegEx::getPattern() {
return pattern;
}

36
src/RegExState.cpp Normal file
View File

@@ -0,0 +1,36 @@
#include "RegExState.h"
RegExState::RegExState(RegExState* inInnerState) {
inner = inInnerState;
}
RegExState::RegExState(char inCharacter) {
character = inCharacter;
}
RegExState::~RegExState() {
//No cleanup necessary
}
void RegExState::addNext(RegExState* nextState) {
nextStates.push_back(nextState);
}
bool RegExState::characterIs(char inCharacter) {
return character == inCharacter;
}
std::vector<RegExState*>* RegExState::advance(char advanceCharacter) {
std::vector<RegExState*>* advanceStates = new std::vector<RegExState*>();
for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) {
if (nextStates[i]->characterIs(advanceCharacter))
advanceStates->push_back(nextStates[i]);
}
return advanceStates;
}
bool RegExState::isGoal() {
return inner == NULL && nextStates.size() == 0;
}

View File

@@ -7,6 +7,13 @@ Symbol::Symbol(std::string name, bool isTerminal) {
value = "HAHAHA VALUE"; value = "HAHAHA VALUE";
} }
Symbol::Symbol(std::string name, bool isTerminal, std::string value) {
this->name = name;
this->terminal = isTerminal;
this->subTree = NULL;
this->value = value;
}
Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) { Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) {
this->name = name; this->name = name;
this->terminal = isTerminal; this->terminal = isTerminal;