diff --git a/include/NodeTree.h b/include/NodeTree.h index 83cd6f7..3993269 100644 --- a/include/NodeTree.h +++ b/include/NodeTree.h @@ -28,9 +28,11 @@ class NodeTree { std::vector*> getParents(); void addChild(NodeTree* child); + void addChildren(std::vector*>* children); int findChild(NodeTree* child); void removeChild(NodeTree* child); void removeChild(int index); + void clearChildren(); std::vector*> getChildren(); NodeTree* get(int index); @@ -111,6 +113,12 @@ void NodeTree::addChild(NodeTree* child) { children.push_back(child); } +template +void NodeTree::addChildren(std::vector*>* children) { + for (std::vector*>::size_type i = 0; i < children->size(); i++) + addChild((*children)[i]); +} + template int NodeTree::findChild(NodeTree* child) { for (int i = 0; i < children.size(); i++) { @@ -135,6 +143,13 @@ void NodeTree::removeChild(NodeTree* child) { } } +template +void NodeTree::clearChildren() { + for (std::vector::size_type i = 0; i < children.size(); i++) + children[i] = NULL; + children.clear(); +} + template std::vector*> NodeTree::getChildren() { return children; diff --git a/include/ParseRule.h b/include/ParseRule.h index ba5cca9..9f36367 100644 --- a/include/ParseRule.h +++ b/include/ParseRule.h @@ -37,6 +37,7 @@ class ParseRule { bool isAtEnd(); void setLookahead(std::vector* lookahead); + void addLookahead(std::vector* lookahead); std::vector* getLookahead(); std::string toString(); diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h index 0edb3c1..dc21036 100644 --- a/include/RNGLRParser.h +++ b/include/RNGLRParser.h @@ -1,3 +1,6 @@ +#ifndef RNGLRPARSER_H +#define RNGLRPARSER_H + #include #include #include "Parser.h" @@ -8,16 +11,30 @@ class RNGLRParser: public Parser { RNGLRParser(); ~RNGLRParser(); NodeTree* parseInput(std::string inputString); + + private: void reducer(int i); void shifter(int i); + void addChildren(NodeTree* parent, std::vector*> children, int nullablePartsIndex); + void addStates(std::vector< State* >* stateSets, State* state); bool reducesToNull(ParseRule* rule); bool reducesToNull(ParseRule* rule, std::vector avoidList); - private: + + bool belongsToFamily(NodeTree* node, std::vector*>* nodes); + bool arePacked(std::vector*>* nodes); + bool isPacked(NodeTree* node); + void setPacked(NodeTree* node, bool isPacked) + std::vector input; GraphStructuredStack gss; //start node, lefthand side of the reduction, reduction length std::queue*, Symbol*>, int > > toReduce; //Node coming from, state going to std::queue< std::pair*, int> > toShift; + + std::vector*> nullableParts; + std::map*, bool> packedMap; }; + +#endif diff --git a/main.cpp b/main.cpp index c33d62b..956b138 100644 --- a/main.cpp +++ b/main.cpp @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) { //std::cout << "Doing stateSetToString from Main" << std::endl; std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; std::cout << parser.stateSetToString() << std::endl; - //std::cout << "finished stateSetToString from Main" << std::endl; + std::cout << "finished stateSetToString from Main" << std::endl; std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl; std::cout << parser.tableToString() << std::endl; std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl; diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 1dda671..5373c9b 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -31,7 +31,7 @@ Symbol* Lexer::next() { RegEx* longestRegEx = NULL; std::string remainingString = input.substr(currentPosition,input.length()-1); for (std::vector::size_type i = 0; i < regExs.size(); i++) { - std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl; + //std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl; int currentMatch = regExs[i]->longMatch(remainingString); if (currentMatch > longestMatch) { longestMatch = currentMatch; @@ -39,9 +39,10 @@ Symbol* Lexer::next() { } } if (longestRegEx != NULL) { + std::string eatenString = input.substr(currentPosition, longestMatch+1); currentPosition += longestMatch + 1; - std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <getPattern(), true); + //std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <getPattern(), true, eatenString); } else { std::cout << "Found no applicable regex" << std::endl; std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl; diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index 41254c3..6d5aced 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -89,6 +89,20 @@ void ParseRule::setLookahead(std::vector* lookahead) { this->lookahead = lookahead; } +void ParseRule::addLookahead(std::vector* lookahead) { + for (std::vector::size_type i = 0; i < lookahead->size(); i++) { + bool alreadyIn = false; + for (std::vector::size_type j = 0; j < this->lookahead->size(); j++) { + if (*((*lookahead)[i]) == *((*(this->lookahead))[j])) { + alreadyIn = true; + break; + } + } + if (!alreadyIn) + this->lookahead->push_back((*lookahead)[i]); + } +} + std::vector* ParseRule::getLookahead() { return lookahead; } diff --git a/src/Parser.cpp b/src/Parser.cpp index 8927265..0097fc2 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -78,6 +78,35 @@ void Parser::loadGrammer(std::string grammerInputString) { std::cout << loadedGrammer[i]->toString() << std::endl; } +void Parser::createStateSet() { + std::cout << "Begining creation of stateSet" << std::endl; + //First state has no parents + + //Set the first state's basis to be the goal rule with lookahead EOF + ParseRule* goalRule = loadedGrammer[0]->clone(); + std::vector* goalRuleLookahead = new std::vector(); + goalRuleLookahead->push_back(EOFSymbol); + goalRule->setLookahead(goalRuleLookahead); + stateSets.push_back( new State(0, goalRule)); + //std::cout << "Begining for main set for loop" << std::endl; + for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) { + //closure + closure(stateSets[i]); + //Add the new states + addStates(&stateSets, stateSets[i]); + } + table.remove(1, EOFSymbol); +} + +int Parser::stateNum(State* state) { + for (std::vector::size_type i = 0; i < stateSets.size(); i++) { + if (*(stateSets[i]) == *state) { + return i; + } + } + return -1; +} + std::vector* Parser::firstSet(Symbol* token) { std::vector avoidList; return firstSet(token, avoidList); @@ -131,35 +160,6 @@ std::vector* Parser::firstSet(Symbol* token, std::vector avoid return(first); } -void Parser::createStateSet() { - std::cout << "Begining creation of stateSet" << std::endl; - //First state has no parents - - //Set the first state's basis to be the goal rule with lookahead EOF - ParseRule* goalRule = loadedGrammer[0]->clone(); - std::vector* goalRuleLookahead = new std::vector(); - goalRuleLookahead->push_back(EOFSymbol); - goalRule->setLookahead(goalRuleLookahead); - stateSets.push_back( new State(0, goalRule)); - //std::cout << "Begining for main set for loop" << std::endl; - for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) { - //closure - closure(stateSets[i]); - //Add the new states - addStates(&stateSets, stateSets[i]); - } - table.remove(1, EOFSymbol); -} - -int Parser::stateNum(State* state) { - for (std::vector::size_type i = 0; i < stateSets.size(); i++) { - if (*(stateSets[i]) == *state) { - return i; - } - } - return -1; -} - //Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { //Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end) @@ -181,7 +181,7 @@ std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { } } followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end()); - delete symbolFirstSet; + //delete symbolFirstSet; rule->advancePointer(); } if (rule->isAtEnd()) { @@ -192,8 +192,10 @@ std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { for (std::vector::size_type i = 0; i < followSet->size(); i++) { bool alreadyIn = false; for (std::vector::size_type j = 0; j < followSetReturn->size(); j++) - if (*((*followSet)[i]) == *((*followSetReturn)[j])) + if (*((*followSet)[i]) == *((*followSetReturn)[j])) { alreadyIn = true; + break; + } if (!alreadyIn) followSetReturn->push_back((*followSet)[i]); } @@ -219,7 +221,9 @@ void Parser::closure(State* state) { //Check to make sure not already in bool isAlreadyInState = false; for (std::vector::size_type k = 0; k < stateTotal->size(); k++) { - if (*((*stateTotal)[k]) == *currentGramRule) { + if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) { + std::cout << (*stateTotal)[k]->toString() << std::endl; + (*stateTotal)[k]->addLookahead(currentGramRule->getLookahead()); isAlreadyInState = true; break; } diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 1fedad0..37ef9e6 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -112,6 +112,7 @@ void RNGLRParser::reducer(int i) { gss.addEdge(toStateNode, currentReached); if (reduction.second != 0) { //Do all non null reduction + std::cout << "Checking for non-null reductions in states that already existed" << std::endl; std::vector actions = *(table.get(toState, input[i])); for (std::vector::size_type k = 0; k < actions.size(); k++) if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0) @@ -123,9 +124,10 @@ void RNGLRParser::reducer(int i) { gss.addToFrontier(i, toStateNode); gss.addEdge(toStateNode, currentReached); + std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl; std::vector actions = *(table.get(toState, input[i])); for (std::vector::size_type k = 0; k < actions.size(); k++) { - //Shift + std::cout << "Action is " << actions[k]->toString() << std::endl; if (actions[k]->action == ParseAction::SHIFT) toShift.push(std::make_pair(toStateNode, actions[k]->shiftState)); else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0) @@ -175,6 +177,48 @@ void RNGLRParser::shifter(int i) { } } +void RNGLRParser::addChildren(NodeTree* parent, std::vector*>* children, int nullablePartsIndex) { + if (nullablePartsIndex != 0) + children->push_back(nullableParts[nullablePartsIndex]); + if (!belongsToFamily(parent, children)) { + if (parent->getChildren().size() == 0) { + parent->addChildren(children); + } else { + if (!arePacked(parent->getChildren())) { + NodeTree* subParent = new NodeTree(); + setPacked(subParent, true); + subParent->addChildren(&(parent->getChildren()); + parent->clearChildren(); + parent->addChild(subParent); + } + NodeTree* t = new NodeTree(); + setPacked(t, true); + parent->addChild(t); + t->addChildren(children); + } + } +} + +bool RNGLRParser::belongsToFamily(NodeTree* node, std::vector*>* nodes) { + // +} + +bool RNGLRParser::arePacked(std::vector*>* nodes) { + bool packed = true; + for (std::vector*>::size_type i = 0; i < nodes->size(); i++) + packed &= packedMap[node]; + return packed; +} + +bool RNGLRParser::isPacked(NodeTree* node) { + return packedMap[node]; +} + +void RNGLRParser::setPacked(NodeTree* node, bool isPacked) { + packedMap[node] = isPacked; +} + + //Have to use own add states function in order to construct RN table instead of LALR table void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) { std::vector< State* > newStates; diff --git a/src/RegEx.cpp b/src/RegEx.cpp index 40b740f..4599ac0 100644 --- a/src/RegEx.cpp +++ b/src/RegEx.cpp @@ -243,9 +243,9 @@ int RegEx::longMatch(std::string stringToMatch) { for (std::vector::size_type j = 0; j < currentStates.size(); j++) { if (currentStates[j]->isGoal()) { lastMatch = i-1; - std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl; + //std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl; } else { - std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <toString() << " is not goal" <* addStates = currentStates[j]->advance(stringToMatch.at(i)); nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());