Fixed a lot of bugs. Actually gets through the entire experimental grammer. (A largeish experimental grammer for Kraken written to continue testing and to really start language development.
This commit is contained in:
@@ -18,6 +18,7 @@ class ParseAction {
|
|||||||
ParseAction(ActionType action, ParseRule* reduceRule);
|
ParseAction(ActionType action, ParseRule* reduceRule);
|
||||||
ParseAction(ActionType action, int shiftState);
|
ParseAction(ActionType action, int shiftState);
|
||||||
~ParseAction();
|
~ParseAction();
|
||||||
|
bool const equalsExceptLookahead(const ParseAction &other);
|
||||||
bool const operator==(const ParseAction &other);
|
bool const operator==(const ParseAction &other);
|
||||||
bool const operator!=(const ParseAction &other);
|
bool const operator!=(const ParseAction &other);
|
||||||
std::string toString();
|
std::string toString();
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ class Parser {
|
|||||||
|
|
||||||
void loadGrammer(std::string grammerInputString);
|
void loadGrammer(std::string grammerInputString);
|
||||||
std::vector<Symbol*>* firstSet(Symbol* token);
|
std::vector<Symbol*>* firstSet(Symbol* token);
|
||||||
|
std::vector<Symbol*>* firstSet(Symbol* token, std::vector<Symbol*> &avoidList);
|
||||||
void printFirstSets();
|
void printFirstSets();
|
||||||
std::vector<Symbol*>* incrementiveFollowSet(ParseRule* rule);
|
std::vector<Symbol*>* incrementiveFollowSet(ParseRule* rule);
|
||||||
void createStateSet();
|
void createStateSet();
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ class RegEx {
|
|||||||
RegEx(std::string inPattern);
|
RegEx(std::string inPattern);
|
||||||
~RegEx();
|
~RegEx();
|
||||||
|
|
||||||
|
void construct();
|
||||||
|
void deperenthesize();
|
||||||
int longMatch(std::string stringToMatch);
|
int longMatch(std::string stringToMatch);
|
||||||
std::string getPattern();
|
std::string getPattern();
|
||||||
std::string toString();
|
std::string toString();
|
||||||
|
|||||||
7
main.cpp
7
main.cpp
@@ -45,18 +45,23 @@ int main(int argc, char* argv[]) {
|
|||||||
Parser parser;
|
Parser parser;
|
||||||
parser.loadGrammer(grammerInputFileString);
|
parser.loadGrammer(grammerInputFileString);
|
||||||
//std::cout << "Creating State Set from Main" << std::endl;
|
//std::cout << "Creating State Set from Main" << std::endl;
|
||||||
|
std::cout << "\n\n\n\n\n\n\n\n\n\nState Set" << std::endl;
|
||||||
parser.createStateSet();
|
parser.createStateSet();
|
||||||
//std::cout << "finished State Set from Main" << std::endl;
|
//std::cout << "finished State Set from Main" << std::endl;
|
||||||
//std::cout << "Doing stateSetToString from Main" << std::endl;
|
//std::cout << "Doing stateSetToString from Main" << std::endl;
|
||||||
|
std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
|
||||||
std::cout << parser.stateSetToString() << std::endl;
|
std::cout << parser.stateSetToString() << std::endl;
|
||||||
//std::cout << "finished stateSetToString from Main" << std::endl;
|
//std::cout << "finished stateSetToString from Main" << std::endl;
|
||||||
|
std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl;
|
||||||
std::cout << parser.tableToString() << std::endl;
|
std::cout << parser.tableToString() << std::endl;
|
||||||
|
std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl;
|
||||||
std::cout << grammerInputFileString << std::endl;
|
std::cout << grammerInputFileString << std::endl;
|
||||||
|
std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer toString" << std::endl;
|
||||||
std::cout << parser.grammerToString() << std::endl;
|
std::cout << parser.grammerToString() << std::endl;
|
||||||
//std::cout << parser.grammerToDOT() << std::endl;
|
//std::cout << parser.grammerToDOT() << std::endl;
|
||||||
|
|
||||||
//outFile << parser.grammerToDOT() << std::endl;
|
//outFile << parser.grammerToDOT() << std::endl;
|
||||||
|
std::cout << "\n\n\n\n\n\n\n\n\n\nParsing" << std::endl;
|
||||||
|
|
||||||
std::cout << programInputFileString << std::endl;
|
std::cout << programInputFileString << std::endl;
|
||||||
NodeTree* parseTree = parser.parseInput(programInputFileString);
|
NodeTree* parseTree = parser.parseInput(programInputFileString);
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ void Lexer::addRegEx(std::string regExString) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Symbol* Lexer::next() {
|
Symbol* Lexer::next() {
|
||||||
std::cout << "Current at is " << input.substr(currentPosition,input.length()-1) << " currentPos is " << currentPosition <<std::endl;
|
std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
|
||||||
//If we're at the end, return an eof
|
//If we're at the end, return an eof
|
||||||
if (currentPosition == input.length()-1)
|
if (currentPosition == input.length()-1)
|
||||||
return new Symbol("$EOF$", true);
|
return new Symbol("$EOF$", true);
|
||||||
@@ -31,7 +31,7 @@ Symbol* Lexer::next() {
|
|||||||
RegEx* longestRegEx = NULL;
|
RegEx* longestRegEx = NULL;
|
||||||
std::string remainingString = input.substr(currentPosition,input.length()-1);
|
std::string remainingString = input.substr(currentPosition,input.length()-1);
|
||||||
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
|
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
|
||||||
std::cout << "Trying regex " << regExs[i]->toString() << std::endl;
|
std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
|
||||||
int currentMatch = regExs[i]->longMatch(remainingString);
|
int currentMatch = regExs[i]->longMatch(remainingString);
|
||||||
if (currentMatch > longestMatch) {
|
if (currentMatch > longestMatch) {
|
||||||
longestMatch = currentMatch;
|
longestMatch = currentMatch;
|
||||||
@@ -40,11 +40,11 @@ Symbol* Lexer::next() {
|
|||||||
}
|
}
|
||||||
if (longestRegEx != NULL) {
|
if (longestRegEx != NULL) {
|
||||||
currentPosition += longestMatch + 1;
|
currentPosition += longestMatch + 1;
|
||||||
std::cout << "Current at is " << input.substr(currentPosition,input.length()-1) << " currentPos is " << currentPosition <<std::endl;
|
std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
|
||||||
return new Symbol(longestRegEx->getPattern(), true);
|
return new Symbol(longestRegEx->getPattern(), true);
|
||||||
} else {
|
} else {
|
||||||
std::cout << "Found no applicable regex" << std::endl;
|
std::cout << "Found no applicable regex" << std::endl;
|
||||||
std::cout << "Remaining is " << input.substr(currentPosition,input.length()-1) << std::endl;
|
std::cout << "Remaining is " << input.substr(currentPosition,input.length()-1) << std::endl;
|
||||||
return NULL;
|
return new Symbol("$NO_APPLICABLE_REGEX$", true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -22,6 +22,9 @@ ParseAction::~ParseAction() {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool ParseAction::equalsExceptLookahead(const ParseAction &other) {
|
||||||
|
return( action == other.action && ( reduceRule == other.reduceRule || reduceRule->equalsExceptLookahead(*(other.reduceRule)) ) && shiftState == other.shiftState);
|
||||||
|
}
|
||||||
|
|
||||||
const bool ParseAction::operator==(const ParseAction &other) {
|
const bool ParseAction::operator==(const ParseAction &other) {
|
||||||
return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState);
|
return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState);
|
||||||
|
|||||||
@@ -73,6 +73,21 @@ void Parser::loadGrammer(std::string grammerInputString) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
||||||
|
//std::cout << "Simple first set for " << token->toString() << std::endl;
|
||||||
|
std::vector<Symbol*> avoidList;
|
||||||
|
return firstSet(token, avoidList);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Symbol*>* Parser::firstSet(Symbol* token, std::vector<Symbol*> &avoidList) {
|
||||||
|
//If we've already done this token, don't do it again
|
||||||
|
for (std::vector<Symbol*>::size_type i = 0; i < avoidList.size(); i++)
|
||||||
|
if (*(avoidList[i]) == *token) {
|
||||||
|
return new std::vector<Symbol*>();
|
||||||
|
//std::cout << "Avoiding firstSet for " << token->toString() << std::endl;
|
||||||
|
}
|
||||||
|
avoidList.push_back(token);
|
||||||
|
//std::cout << "Cpx first set for " << token->toString() << std::endl;
|
||||||
|
//std::cout << "Doing first set for " << token->toString() << std::endl;
|
||||||
std::vector<Symbol*>* first = new std::vector<Symbol*>();
|
std::vector<Symbol*>* first = new std::vector<Symbol*>();
|
||||||
//First, if the symbol is a terminal, than it's first set is just itself.
|
//First, if the symbol is a terminal, than it's first set is just itself.
|
||||||
if (token->isTerminal()) {
|
if (token->isTerminal()) {
|
||||||
@@ -96,7 +111,7 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
|||||||
recursiveFirstSet->push_back(rightToken);
|
recursiveFirstSet->push_back(rightToken);
|
||||||
} else {
|
} else {
|
||||||
//Add the entire set
|
//Add the entire set
|
||||||
recursiveFirstSet = firstSet(rightToken);
|
recursiveFirstSet = firstSet(rightToken, avoidList);
|
||||||
}
|
}
|
||||||
first->insert(first->end(), recursiveFirstSet->begin(), recursiveFirstSet->end());
|
first->insert(first->end(), recursiveFirstSet->begin(), recursiveFirstSet->end());
|
||||||
//Check to see if the current recursiveFirstSet contains NULL, if so, then go through again with the next token. (if there is one)
|
//Check to see if the current recursiveFirstSet contains NULL, if so, then go through again with the next token. (if there is one)
|
||||||
@@ -106,6 +121,7 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
|||||||
recFirstSetHasNull = true;
|
recFirstSetHasNull = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
delete recursiveFirstSet;
|
||||||
j++;
|
j++;
|
||||||
} while (recFirstSetHasNull && loadedGrammer[i]->getRightSide().size() > j);
|
} while (recFirstSetHasNull && loadedGrammer[i]->getRightSide().size() > j);
|
||||||
}
|
}
|
||||||
@@ -159,6 +175,7 @@ std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
|
|||||||
for (std::vector<Symbol*>::size_type i = 0; i < symbolFirstSet->size(); i++) {
|
for (std::vector<Symbol*>::size_type i = 0; i < symbolFirstSet->size(); i++) {
|
||||||
if (*((*symbolFirstSet)[i]) == *nullSymbol) {
|
if (*((*symbolFirstSet)[i]) == *nullSymbol) {
|
||||||
symbolFirstSetHasNull = true;
|
symbolFirstSetHasNull = true;
|
||||||
|
symbolFirstSet->erase(symbolFirstSet->begin()+i);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -170,7 +187,17 @@ std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
|
|||||||
symbolFirstSet = rule->getLookahead();
|
symbolFirstSet = rule->getLookahead();
|
||||||
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
|
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
|
||||||
}
|
}
|
||||||
return followSet;
|
std::vector<Symbol*>* followSetReturn = new std::vector<Symbol*>();
|
||||||
|
for (std::vector<Symbol*>::size_type i = 0; i < followSet->size(); i++) {
|
||||||
|
bool alreadyIn = false;
|
||||||
|
for (std::vector<Symbol*>::size_type j = 0; j < followSetReturn->size(); j++)
|
||||||
|
if (*((*followSet)[i]) == *((*followSetReturn)[j]))
|
||||||
|
alreadyIn = true;
|
||||||
|
if (!alreadyIn)
|
||||||
|
followSetReturn->push_back((*followSet)[i]);
|
||||||
|
}
|
||||||
|
delete followSet;
|
||||||
|
return followSetReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Parser::closure(State* state) {
|
void Parser::closure(State* state) {
|
||||||
@@ -185,6 +212,7 @@ void Parser::closure(State* state) {
|
|||||||
if ( !currentStateRule->isAtEnd() && *(currentStateRule->getAtNextIndex()) == *(currentGramRule->getLeftSide())) {
|
if ( !currentStateRule->isAtEnd() && *(currentStateRule->getAtNextIndex()) == *(currentGramRule->getLeftSide())) {
|
||||||
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
|
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
|
||||||
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
|
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
|
||||||
|
//std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl;
|
||||||
currentGramRule->setLookahead(incrementiveFollowSet(currentStateRule));
|
currentGramRule->setLookahead(incrementiveFollowSet(currentStateRule));
|
||||||
|
|
||||||
//Check to make sure not already in
|
//Check to make sure not already in
|
||||||
@@ -340,7 +368,7 @@ void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* actio
|
|||||||
(*(table[stateNum]))[symbolIndex] = action;
|
(*(table[stateNum]))[symbolIndex] = action;
|
||||||
}
|
}
|
||||||
//If the slot is not empty and does not contain ourself, then it is a conflict
|
//If the slot is not empty and does not contain ourself, then it is a conflict
|
||||||
else if ( *((*(table[stateNum]))[symbolIndex]) != *action) {
|
else if ( !(*(table[stateNum]))[symbolIndex]->equalsExceptLookahead(*action)) {
|
||||||
//std::cout << "not Null!" << std::endl;
|
//std::cout << "not Null!" << std::endl;
|
||||||
std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl;
|
std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl;
|
||||||
//Don't overwrite
|
//Don't overwrite
|
||||||
|
|||||||
121
src/RegEx.cpp
121
src/RegEx.cpp
@@ -2,9 +2,15 @@
|
|||||||
|
|
||||||
RegEx::RegEx(std::string inPattern) {
|
RegEx::RegEx(std::string inPattern) {
|
||||||
pattern = inPattern;
|
pattern = inPattern;
|
||||||
|
construct();
|
||||||
|
deperenthesize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegEx::construct() {
|
||||||
std::vector<RegExState*> previousStates;
|
std::vector<RegExState*> previousStates;
|
||||||
std::vector<RegExState*> currentStates;
|
std::vector<RegExState*> currentStates;
|
||||||
std::stack<std::pair<std::vector<RegExState*>, RegExState*> > perenStack;
|
std::stack<std::pair<std::vector<RegExState*>, std::vector<RegExState*> > > perenStack;
|
||||||
|
bool alternating = false;
|
||||||
begin = new RegExState();
|
begin = new RegExState();
|
||||||
currentStates.push_back(begin);
|
currentStates.push_back(begin);
|
||||||
for (int i = 0; i < pattern.length(); i++) {
|
for (int i = 0; i < pattern.length(); i++) {
|
||||||
@@ -42,11 +48,7 @@ RegEx::RegEx(std::string inPattern) {
|
|||||||
{
|
{
|
||||||
std::cout << "Alternation at " << i << " in " << pattern << std::endl;
|
std::cout << "Alternation at " << i << " in " << pattern << std::endl;
|
||||||
//alternation
|
//alternation
|
||||||
i++;
|
alternating = true;
|
||||||
RegExState* next = new RegExState(pattern[i]);
|
|
||||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
|
|
||||||
previousStates[j]->addNext(next);
|
|
||||||
currentStates.push_back(next);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@@ -57,17 +59,35 @@ RegEx::RegEx(std::string inPattern) {
|
|||||||
//Create a peren node with an inner empty node
|
//Create a peren node with an inner empty node
|
||||||
RegExState* next = new RegExState(new RegExState());
|
RegExState* next = new RegExState(new RegExState());
|
||||||
|
|
||||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
if (alternating) {
|
||||||
currentStates[j]->addNext(next);
|
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
|
||||||
|
previousStates[j]->addNext(next);
|
||||||
|
|
||||||
previousStates.clear();
|
//Save both current states here as well as the current preren
|
||||||
//Save both current states here as well as the current preren
|
std::vector<RegExState*> savePreviousStates = previousStates;
|
||||||
std::vector<RegExState*> saveStates = currentStates;
|
currentStates.push_back(next);
|
||||||
// saveStates.insert(saveStates.end(), currentStates.begin(), currentStates.end())
|
std::vector<RegExState*> saveCurrentStates = currentStates;
|
||||||
perenStack.push(std::make_pair(saveStates, next));
|
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
|
||||||
|
|
||||||
currentStates.clear();
|
previousStates.clear();
|
||||||
currentStates.push_back(next->getInner());
|
currentStates.clear();
|
||||||
|
currentStates.push_back(next->getInner());
|
||||||
|
alternating = false;
|
||||||
|
} else {
|
||||||
|
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||||
|
currentStates[j]->addNext(next);
|
||||||
|
|
||||||
|
//Save both current states here as well as the current preren
|
||||||
|
std::vector<RegExState*> savePreviousStates = currentStates;
|
||||||
|
currentStates.clear();
|
||||||
|
currentStates.push_back(next);
|
||||||
|
std::vector<RegExState*> saveCurrentStates = currentStates;
|
||||||
|
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
|
||||||
|
|
||||||
|
previousStates.clear();
|
||||||
|
currentStates.clear();
|
||||||
|
currentStates.push_back(next->getInner());
|
||||||
|
}
|
||||||
std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
|
std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -77,15 +97,16 @@ RegEx::RegEx(std::string inPattern) {
|
|||||||
std::cout << "End peren at " << i << " in " << pattern << std::endl;
|
std::cout << "End peren at " << i << " in " << pattern << std::endl;
|
||||||
//perentheses
|
//perentheses
|
||||||
//Pop off the states that will now be the previous states and the peren node which will now be the current node
|
//Pop off the states that will now be the previous states and the peren node which will now be the current node
|
||||||
std::pair<std::vector<RegExState*>, RegExState*> savedPair = perenStack.top();
|
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
|
||||||
perenStack.pop();
|
perenStack.pop();
|
||||||
//Make the it so
|
//Make the it so
|
||||||
previousStates = savedPair.first;
|
previousStates = savedPair.first;
|
||||||
//Make sure the end of the inner stuff points back to the peren node
|
//Make sure the end of the inner stuff points back to the peren node
|
||||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||||
currentStates[j]->addNext(savedPair.second);
|
currentStates[j]->addNext(savedPair.second[savedPair.second.size()-1]);
|
||||||
|
//currentStates[j]->addNext(*(savedPair.second.end()));
|
||||||
currentStates.clear();
|
currentStates.clear();
|
||||||
currentStates.push_back(savedPair.second);
|
currentStates = savedPair.second;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -93,42 +114,42 @@ RegEx::RegEx(std::string inPattern) {
|
|||||||
{
|
{
|
||||||
i++;
|
i++;
|
||||||
std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
|
std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
|
||||||
//Ahh, it's escaping a special character
|
//Ahh, it's escaping a special character, so fall through to the default.
|
||||||
RegExState* next = new RegExState(pattern[i]);
|
|
||||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
|
||||||
currentStates[j]->addNext(next);
|
|
||||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
previousStates.clear();
|
|
||||||
// previousStates.insert(previousStates.begin(), currentStates.begin(), currentStates.end());
|
|
||||||
previousStates = currentStates;
|
|
||||||
currentStates.clear();
|
|
||||||
currentStates.push_back(next);
|
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
std::cout << "Regular" << std::endl;
|
std::cout << "Regular" << std::endl;
|
||||||
//Ahh, it's regular
|
//Ahh, it's regular
|
||||||
RegExState* next = new RegExState(pattern[i]);
|
RegExState* next = new RegExState(pattern[i]);
|
||||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
//If we're alternating, add next as the next for each previous state, and add self to currentStates
|
||||||
currentStates[j]->addNext(next);
|
if (alternating) {
|
||||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
|
||||||
|
previousStates[j]->addNext(next);
|
||||||
|
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
|
||||||
|
}
|
||||||
|
currentStates.push_back(next);
|
||||||
|
alternating = false;
|
||||||
|
} else {
|
||||||
|
//If we're not alternating, add next as next for all the current states, make the current states the new
|
||||||
|
//previous states, and add ourself as the new current state.
|
||||||
|
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||||
|
currentStates[j]->addNext(next);
|
||||||
|
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||||
|
}
|
||||||
|
previousStates.clear();
|
||||||
|
previousStates = currentStates;
|
||||||
|
currentStates.clear();
|
||||||
|
currentStates.push_back(next);
|
||||||
}
|
}
|
||||||
|
|
||||||
previousStates.clear();
|
|
||||||
// previousStates.insert(previousStates.begin(), currentStates.begin(), currentStates.end());
|
|
||||||
previousStates = currentStates;
|
|
||||||
currentStates.clear();
|
|
||||||
currentStates.push_back(next);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//last one is goal state
|
//last one is goal state
|
||||||
for (std::vector<RegExState*>::size_type i = 0; i < currentStates.size(); i++)
|
for (std::vector<RegExState*>::size_type i = 0; i < currentStates.size(); i++)
|
||||||
currentStates[i]->addNext(NULL);
|
currentStates[i]->addNext(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RegEx::deperenthesize() {
|
||||||
std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
|
std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
|
||||||
|
|
||||||
//Now go through and expand the peren nodes to regular nodes
|
//Now go through and expand the peren nodes to regular nodes
|
||||||
@@ -144,13 +165,13 @@ RegEx::RegEx(std::string inPattern) {
|
|||||||
if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) {
|
if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) {
|
||||||
//Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others)
|
//Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others)
|
||||||
std::vector<RegExState*>* insideNextStates = (*nextStates)[j]->getInner()->getNextStates();
|
std::vector<RegExState*>* insideNextStates = (*nextStates)[j]->getInner()->getNextStates();
|
||||||
std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
|
//std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
|
||||||
RegExState* perenState = (*nextStates)[j];
|
RegExState* perenState = (*nextStates)[j];
|
||||||
(*nextStates)[j] = (*insideNextStates)[0];
|
(*nextStates)[j] = (*insideNextStates)[0];
|
||||||
std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
|
//std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
|
||||||
for (std::vector<RegExState*>::size_type k = 1; k < insideNextStates->size(); k++)
|
for (std::vector<RegExState*>::size_type k = 1; k < insideNextStates->size(); k++)
|
||||||
nextStates->push_back((*insideNextStates)[k]);
|
nextStates->push_back((*insideNextStates)[k]);
|
||||||
std::cout << "Replaced beginning: " << begin->toString() << std::endl;
|
//std::cout << "Replaced beginning: " << begin->toString() << std::endl;
|
||||||
//Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner
|
//Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner
|
||||||
std::vector<RegExState*>* perenNextNodes = perenState->getNextStates();
|
std::vector<RegExState*>* perenNextNodes = perenState->getNextStates();
|
||||||
for (std::vector<RegExState*>::size_type k = 0; k < perenNextNodes->size(); k++) {
|
for (std::vector<RegExState*>::size_type k = 0; k < perenNextNodes->size(); k++) {
|
||||||
@@ -166,17 +187,17 @@ RegEx::RegEx(std::string inPattern) {
|
|||||||
traversalList.push_back(perenState->getInner());
|
traversalList.push_back(perenState->getInner());
|
||||||
for (std::vector<RegExState*>::size_type k = 0; k < traversalList.size(); k++) {
|
for (std::vector<RegExState*>::size_type k = 0; k < traversalList.size(); k++) {
|
||||||
std::vector<RegExState*>* nextTraversalStates = traversalList[k]->getNextStates();
|
std::vector<RegExState*>* nextTraversalStates = traversalList[k]->getNextStates();
|
||||||
std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
|
//std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
|
||||||
std::cout << "with children: ";
|
//std::cout << "with children:" << std::endl;
|
||||||
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
|
//for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
|
||||||
std::cout << (*nextTraversalStates)[l]->getCharacter() << " ";
|
// std::cout << "\t\"" << (*nextTraversalStates)[l]->getCharacter() << "\"" << std::endl;
|
||||||
std::cout << std::endl;
|
//std::cout << std::endl;
|
||||||
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++) {
|
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++) {
|
||||||
//If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren
|
//If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren
|
||||||
//And we now replace this reference with the next nodes from the peren node
|
//And we now replace this reference with the next nodes from the peren node
|
||||||
std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
|
//std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
|
||||||
if ((*nextTraversalStates)[l] == perenState) {
|
if ((*nextTraversalStates)[l] == perenState) {
|
||||||
std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
|
// std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
|
||||||
std::vector<RegExState*> endPerenNextStates = *(perenState->getNextStates());
|
std::vector<RegExState*> endPerenNextStates = *(perenState->getNextStates());
|
||||||
(*nextTraversalStates)[l] = endPerenNextStates[0];
|
(*nextTraversalStates)[l] = endPerenNextStates[0];
|
||||||
for (std::vector<RegExState*>::size_type n = 1; n < endPerenNextStates.size(); n++)
|
for (std::vector<RegExState*>::size_type n = 1; n < endPerenNextStates.size(); n++)
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ std::string RegExState::toString(std::vector<RegExState*>* avoid) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (inAvoid) {
|
if (inAvoid) {
|
||||||
string += "->LoopDetected";
|
string += "->loop";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -67,7 +67,37 @@ std::string StringReader::getTokens(std::vector<std::string> stop_chars, bool tr
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rd_string[str_pos] == '\"') {
|
if (rd_string[str_pos] == '\"') {
|
||||||
found_pos = rd_string.find("\"", str_pos+1);
|
//See if we have an even or odd number of backslashes (that is, this quote is not or is escaped)
|
||||||
|
int numBackslashes = 0;
|
||||||
|
int countBack = 1;
|
||||||
|
while (str_pos-countBack >= 0 && rd_string[str_pos-countBack] == '\\') {
|
||||||
|
numBackslashes++;
|
||||||
|
countBack++;
|
||||||
|
}
|
||||||
|
//If the quote is not escaped
|
||||||
|
if (numBackslashes % 2 == 0) {
|
||||||
|
//Find the next quote
|
||||||
|
found_pos = rd_string.find("\"", str_pos+1);
|
||||||
|
//Check to see if the quote is escaped
|
||||||
|
numBackslashes = 0;
|
||||||
|
countBack = 1;
|
||||||
|
while (found_pos-countBack >= 0 && rd_string[found_pos-countBack] == '\\') {
|
||||||
|
numBackslashes++;
|
||||||
|
countBack++;
|
||||||
|
}
|
||||||
|
//While the quote is escaped
|
||||||
|
while (numBackslashes % 2 == 1) {
|
||||||
|
//find the next quote
|
||||||
|
found_pos = rd_string.find("\"", found_pos+1);
|
||||||
|
//Check to see if it's escaped
|
||||||
|
numBackslashes = 0;
|
||||||
|
countBack = 1;
|
||||||
|
while (found_pos-countBack >= 0 && rd_string[found_pos-countBack] == '\\') {
|
||||||
|
numBackslashes++;
|
||||||
|
countBack++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (found_pos == str_pos) //We are at the endline
|
if (found_pos == str_pos) //We are at the endline
|
||||||
|
|||||||
Reference in New Issue
Block a user