Added structure for Regular Expressions, saving work as a backup because of software upgrade.
This commit is contained in:
@@ -4,7 +4,7 @@ project(Kraken)
|
|||||||
|
|
||||||
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
|
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
|
||||||
|
|
||||||
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp )
|
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp )
|
||||||
|
|
||||||
include_directories( ${MY_INCLUDES} )
|
include_directories( ${MY_INCLUDES} )
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "StringReader.h"
|
#include "StringReader.h"
|
||||||
|
#include "RegEx.h"
|
||||||
#include "Symbol.h"
|
#include "Symbol.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -12,9 +13,12 @@ class Lexer {
|
|||||||
Lexer();
|
Lexer();
|
||||||
Lexer(std::string inputString);
|
Lexer(std::string inputString);
|
||||||
~Lexer();
|
~Lexer();
|
||||||
|
void addRegexString(std::string regExString);
|
||||||
void setInput(std::string inputString);
|
void setInput(std::string inputString);
|
||||||
Symbol* next();
|
Symbol* next();
|
||||||
private:
|
private:
|
||||||
StringReader reader;
|
std::vector<RegEx*> regExs;
|
||||||
|
std::string input;
|
||||||
|
int currentPosition;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
23
include/RegEx.h
Normal file
23
include/RegEx.h
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#ifndef REGEX_H
|
||||||
|
#define REGEX_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "RegExState.h"
|
||||||
|
#include "Symbol.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
class RegEx {
|
||||||
|
public:
|
||||||
|
RegEx();
|
||||||
|
RegEx(std::string inPattern);
|
||||||
|
~RegEx();
|
||||||
|
|
||||||
|
int longMatch(std::string stringToMatch);
|
||||||
|
std::string getPattern();
|
||||||
|
private:
|
||||||
|
std::string pattern;
|
||||||
|
RegExState* begin;
|
||||||
|
std::vector<RegExState*> currentStates;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
27
include/RegExState.h
Normal file
27
include/RegExState.h
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#ifndef REGEXSTATE_H
|
||||||
|
#define REGEXSTATE_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "Symbol.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
class RegExState {
|
||||||
|
public:
|
||||||
|
RegExState(RegExState* inInnerState);
|
||||||
|
RegExState(char inCharacter);
|
||||||
|
|
||||||
|
~RegExState();
|
||||||
|
|
||||||
|
void addNext(RegExState* nextState);
|
||||||
|
bool characterIs(char inCharacter);
|
||||||
|
std::vector<RegExState*>* advance(char advanceCharacter);
|
||||||
|
bool isGoal();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<RegExState*> nextStates;
|
||||||
|
RegExState* inner;
|
||||||
|
char character;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
@@ -16,6 +16,7 @@ class NodeTree;
|
|||||||
class Symbol {
|
class Symbol {
|
||||||
public:
|
public:
|
||||||
Symbol(std::string name, bool isTerminal);
|
Symbol(std::string name, bool isTerminal);
|
||||||
|
Symbol(std::string name, bool isTerminal, std::string value);
|
||||||
Symbol(std::string name, bool isTerminal, NodeTree* tree);
|
Symbol(std::string name, bool isTerminal, NodeTree* tree);
|
||||||
~Symbol();
|
~Symbol();
|
||||||
bool const operator==(const Symbol &other);
|
bool const operator==(const Symbol &other);
|
||||||
@@ -30,8 +31,6 @@ class Symbol {
|
|||||||
std::string value;
|
std::string value;
|
||||||
bool terminal;
|
bool terminal;
|
||||||
NodeTree* subTree;
|
NodeTree* subTree;
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -5,7 +5,8 @@ Lexer::Lexer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Lexer::Lexer(std::string inputString) {
|
Lexer::Lexer(std::string inputString) {
|
||||||
reader.setString(inputString);
|
input = inputString;
|
||||||
|
currentPosition = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Lexer::~Lexer() {
|
Lexer::~Lexer() {
|
||||||
@@ -13,12 +14,24 @@ Lexer::~Lexer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Lexer::setInput(std::string inputString) {
|
void Lexer::setInput(std::string inputString) {
|
||||||
reader.setString(inputString);
|
input = inputString;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Lexer::addRegexString(std::string regExString) {
|
||||||
|
regExs.push_back(new RegEx(regExString));
|
||||||
}
|
}
|
||||||
|
|
||||||
Symbol* Lexer::next() {
|
Symbol* Lexer::next() {
|
||||||
std::string token = reader.word();
|
int longestMatch = 0;
|
||||||
if (token != "")
|
RegEx * longestRegEx = NULL;
|
||||||
return new Symbol("\""+token+"\"", true);
|
std::string remainingString = input.substr(currentPosition,input.length()-1);
|
||||||
return new Symbol("$EOF$", false);
|
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
|
||||||
|
int currentMatch = regExs[i]->longMatch(remainingString);
|
||||||
|
if (currentMatch > longestMatch) {
|
||||||
|
longestMatch = currentMatch;
|
||||||
|
longestRegEx = regExs[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
currentPosition += longestMatch;
|
||||||
|
return new Symbol(longestRegEx->getPattern(), true);
|
||||||
}
|
}
|
||||||
60
src/RegEx.cpp
Normal file
60
src/RegEx.cpp
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
#include "RegEx.h"
|
||||||
|
|
||||||
|
RegEx::RegEx(std::string inPattern) {
|
||||||
|
pattern = inPattern;
|
||||||
|
RegExState* current;
|
||||||
|
begin = new RegExState(pattern.at(0));
|
||||||
|
current = begin;
|
||||||
|
for (int i = 1; i < pattern.length(); i++) {
|
||||||
|
RegExState* next = new RegExState(pattern.at(i));
|
||||||
|
current->addNext(next);
|
||||||
|
current = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RegEx::~RegEx() {
|
||||||
|
//No cleanup necessary
|
||||||
|
}
|
||||||
|
|
||||||
|
int RegEx::longMatch(std::string stringToMatch) {
|
||||||
|
int lastMatch = 0;
|
||||||
|
currentStates = *(begin->advance(stringToMatch.at(0)));
|
||||||
|
std::vector<RegExState*> nextStates;
|
||||||
|
|
||||||
|
for (int i = 1; i < stringToMatch.size(); i++) {
|
||||||
|
//Go through every current state. Check to see if it is goal, if so update last goal.
|
||||||
|
//Also, add each state's advance to nextStates
|
||||||
|
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||||
|
if (currentStates[j]->isGoal())
|
||||||
|
lastMatch = i-1;
|
||||||
|
std::vector<RegExState*>* addStates = currentStates[j]->advance(stringToMatch.at(i));
|
||||||
|
nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());
|
||||||
|
delete addStates;
|
||||||
|
}
|
||||||
|
//Now, clear our current states and add eaczh one of our addStates if it is not already in current states
|
||||||
|
currentStates.clear();
|
||||||
|
for (std::vector<RegExState*>::size_type j = 0; j < nextStates.size(); j++) {
|
||||||
|
bool inCurrStates = false;
|
||||||
|
for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++) {
|
||||||
|
if (nextStates[j] == currentStates[i])
|
||||||
|
inCurrStates = true;
|
||||||
|
}
|
||||||
|
if (!inCurrStates)
|
||||||
|
currentStates.push_back(nextStates[j]);
|
||||||
|
}
|
||||||
|
nextStates.clear();
|
||||||
|
//If we can't continue matching, just return our last matched
|
||||||
|
if (currentStates.size() == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
//Check to see if we match on the last character in the string
|
||||||
|
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||||
|
if (currentStates[j]->isGoal())
|
||||||
|
lastMatch = stringToMatch.size()-1;
|
||||||
|
}
|
||||||
|
return lastMatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string RegEx::getPattern() {
|
||||||
|
return pattern;
|
||||||
|
}
|
||||||
36
src/RegExState.cpp
Normal file
36
src/RegExState.cpp
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
#include "RegExState.h"
|
||||||
|
|
||||||
|
RegExState::RegExState(RegExState* inInnerState) {
|
||||||
|
inner = inInnerState;
|
||||||
|
}
|
||||||
|
|
||||||
|
RegExState::RegExState(char inCharacter) {
|
||||||
|
character = inCharacter;
|
||||||
|
}
|
||||||
|
|
||||||
|
RegExState::~RegExState() {
|
||||||
|
//No cleanup necessary
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegExState::addNext(RegExState* nextState) {
|
||||||
|
nextStates.push_back(nextState);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RegExState::characterIs(char inCharacter) {
|
||||||
|
return character == inCharacter;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<RegExState*>* RegExState::advance(char advanceCharacter) {
|
||||||
|
std::vector<RegExState*>* advanceStates = new std::vector<RegExState*>();
|
||||||
|
for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) {
|
||||||
|
if (nextStates[i]->characterIs(advanceCharacter))
|
||||||
|
advanceStates->push_back(nextStates[i]);
|
||||||
|
}
|
||||||
|
return advanceStates;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RegExState::isGoal() {
|
||||||
|
return inner == NULL && nextStates.size() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -7,6 +7,13 @@ Symbol::Symbol(std::string name, bool isTerminal) {
|
|||||||
value = "HAHAHA VALUE";
|
value = "HAHAHA VALUE";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Symbol::Symbol(std::string name, bool isTerminal, std::string value) {
|
||||||
|
this->name = name;
|
||||||
|
this->terminal = isTerminal;
|
||||||
|
this->subTree = NULL;
|
||||||
|
this->value = value;
|
||||||
|
}
|
||||||
|
|
||||||
Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) {
|
Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) {
|
||||||
this->name = name;
|
this->name = name;
|
||||||
this->terminal = isTerminal;
|
this->terminal = isTerminal;
|
||||||
|
|||||||
Reference in New Issue
Block a user