From 9887555dd54bb4774ef288413169f4b6fc917326 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Wed, 31 Jul 2013 23:51:05 -0400 Subject: [PATCH] Have almost got the RNGLR recognizer. Something is wrong with the GSS, I think when it's built. It seems to sometimes have parents as children, or something. --- CMakeLists.txt | 2 +- include/GraphStructuredStack.h | 9 ++- include/LALRParser.h | 17 ----- include/NodeTree.h | 3 +- include/RNGLRParser.h | 17 +++-- include/Symbol.h | 1 + include/Table.h | 1 + main.cpp | 4 +- src/GraphStructuredStack.cpp | 41 ++++++++--- src/RNGLRParser.cpp | 124 +++++++++++++++++++++++---------- src/Symbol.cpp | 4 ++ src/Table.cpp | 23 +++++- 12 files changed, 169 insertions(+), 77 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 131db54..ea7251e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/GraphStructuredStack.h b/include/GraphStructuredStack.h index dba8123..f24cdfd 100644 --- a/include/GraphStructuredStack.h +++ b/include/GraphStructuredStack.h @@ -1,6 +1,8 @@ #include #include -#include "NodeTree.h" +#include +#include "NodeTree.h" +#include "util.h" #ifndef GRAPH_STRUCTURED_STACK #define GRAPH_STRUCTURED_STACK @@ -17,7 +19,10 @@ class GraphStructuredStack { std::vector*>* getReachable(NodeTree* start, int lenght); bool hasEdge(NodeTree* start, NodeTree* end); void addEdge(NodeTree* start, NodeTree* end); + + std::string toString(); private: std::vector*>*> gss; - // }; + +#endif diff --git a/include/LALRParser.h b/include/LALRParser.h index ef711e7..e8c35fc 100644 --- a/include/LALRParser.h +++ b/include/LALRParser.h @@ -2,24 +2,7 @@ #define LALRPARSER_H #include "Parser.h" -/* -#include "util.h" -#include "ParseRule.h" -#include "ParseAction.h" -#include "Symbol.h" -#include "State.h" -#include "StringReader.h" -#include "Lexer.h" -#include "NodeTree.h" -#include "Table.h" -#include -#include -#include -#include -#include -#include -*/ class LALRParser: public Parser { public: LALRParser(); diff --git a/include/NodeTree.h b/include/NodeTree.h index 24b89f4..83cd6f7 100644 --- a/include/NodeTree.h +++ b/include/NodeTree.h @@ -70,7 +70,6 @@ NodeTree::NodeTree() { template NodeTree::NodeTree(std::string name, T inData) { - data = NULL; this->name = name; this->data = inData; id = idCounter++; @@ -138,7 +137,7 @@ void NodeTree::removeChild(NodeTree* child) { template std::vector*> NodeTree::getChildren() { - return &children; + return children; } template diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h index d1113ea..d8ed0be 100644 --- a/include/RNGLRParser.h +++ b/include/RNGLRParser.h @@ -1,17 +1,20 @@ - #include +#include +#include "Parser.h" +#include "GraphStructuredStack.h" -class RNGLRParser { +class RNGLRParser: public Parser { public: - parseInput(std::string inputString); - reducer(int i); - shifter(int i); + RNGLRParser(); + ~RNGLRParser(); + NodeTree* parseInput(std::string inputString); + void reducer(int i); + void shifter(int i); private: - Lexer lexer; std::vector input; GraphStructuredStack gss; //start node, lefthand side of the reduction, reduction length - std::queue*, Symbol*>, int > toReduce; + std::queue*, Symbol*>, int > > toReduce; //Node coming from, state going to std::queue< std::pair*, int> > toShift; }; diff --git a/include/Symbol.h b/include/Symbol.h index cc75b8b..bc96f1b 100644 --- a/include/Symbol.h +++ b/include/Symbol.h @@ -20,6 +20,7 @@ class Symbol { Symbol(std::string name, bool isTerminal, NodeTree* tree); ~Symbol(); bool const operator==(const Symbol &other); + bool const operator!=(const Symbol &other); std::string getName(); std::string toString(); Symbol* clone(); diff --git a/include/Table.h b/include/Table.h index 7becf30..659111b 100644 --- a/include/Table.h +++ b/include/Table.h @@ -14,6 +14,7 @@ class Table { void setSymbols(Symbol* EOFSymbol, Symbol* nullSymbol); void add(int stateNum, Symbol* tranSymbol, ParseAction* action); std::vector* get(int state, Symbol* token); + ParseAction* getShift(int state, Symbol* token); std::string toString(); private: std::vector< std::vector< std::vector* >* > table; diff --git a/main.cpp b/main.cpp index ac97e29..c33d62b 100644 --- a/main.cpp +++ b/main.cpp @@ -2,6 +2,7 @@ #include "Symbol.h" #include "Lexer.h" #include "LALRParser.h" +#include "RNGLRParser.h" #include #include #include @@ -43,7 +44,8 @@ int main(int argc, char* argv[]) { programInputFileString.append(line+"\n"); } - LALRParser parser; + //LALRParser parser; + RNGLRParser parser; parser.loadGrammer(grammerInputFileString); //std::cout << "Creating State Set from Main" << std::endl; std::cout << "\n\n\n\n\n\n\n\n\n\nState Set" << std::endl; diff --git a/src/GraphStructuredStack.cpp b/src/GraphStructuredStack.cpp index c586192..83a57cf 100644 --- a/src/GraphStructuredStack.cpp +++ b/src/GraphStructuredStack.cpp @@ -14,14 +14,16 @@ NodeTree* GraphStructuredStack::newNode(int stateNum) { void GraphStructuredStack::addToFrontier(int frontier, NodeTree* node) { //First, make sure our vector has this and lesser frontiers. If not, add it and up to it - while (frontier >= gss.size()) { + while (gss.size() <= frontier) { + std::cout << "Adding a new frontier: " << gss.size() << std::endl; gss.push_back(new std::vector*>()); } + std::cout << "Adding " << node << " (" << node->getData() << ") to frontier " << frontier << std::endl; gss[frontier]->push_back(node); } NodeTree* GraphStructuredStack::inFrontier(int frontier, int state) { - if (frontierIsEmpty()) + if (frontierIsEmpty(frontier)) return NULL; for (std::vector*>::size_type i = 0; i < gss[frontier]->size(); i++) { if ((*(gss[frontier]))[i]->getData() == state) @@ -39,24 +41,31 @@ bool GraphStructuredStack::frontierHasAccState(int frontier) { return inFrontier(frontier, 1); } -std::vector*>* GraphStructuredStack::getReachable(NodeTree* start, int lenght) { +std::vector*>* GraphStructuredStack::getReachable(NodeTree* start, int length) { std::vector*>* reachableList = new std::vector*>(); std::queue*> currentNodes; std::queue*> nextNodes; - currentNodes.push_back(start); - for (int i = 0; i < lenght; i++) { + currentNodes.push(start); + for (int i = 0; i < length; i++) { while (!currentNodes.empty()) { NodeTree* currentNode = currentNodes.front(); currentNodes.pop(); - std::vector*> children = currentNode->getChildren(); - for (std::vector*>::size_type j = 0; j < children.size(); j++) - nextNodes.push_back(children[j]); + std::vector*> children = currentNode->getChildren(); + std::cout << currentNode->getData() << " has children "; + for (std::vector*>::size_type j = 0; j < children.size(); j++) { + std::cout << children[j]->getData() << " "; + nextNodes.push(children[j]); + } + std::cout << std::endl; } currentNodes = nextNodes; - nextNodes.clear(); + //No clear function, so go through and remove + while(!nextNodes.empty()) + nextNodes.pop(); } while (!currentNodes.empty()) { reachableList->push_back(currentNodes.front()); + std::cout << currentNodes.front()->getData() << " is reachable from " << start->getData() << " by length " << length << std::endl; currentNodes.pop(); } return reachableList; @@ -70,4 +79,16 @@ bool GraphStructuredStack::hasEdge(NodeTree* start, NodeTree* end) { void GraphStructuredStack::addEdge(NodeTree* start, NodeTree* end) { start->addChild(end); end->addChild(start); -} \ No newline at end of file +} + +std::string GraphStructuredStack::toString() { + std::string tostring = ""; + for (std::vector*>*>::size_type i = 0; i < gss.size(); i++) { + tostring += "Frontier: " + intToString(i) + "\n"; + for (std::vector*>::size_type j = 0; j < gss[i]->size(); j++) { + tostring += "|" + intToString((*(gss[i]))[j]->getData()) + "| "; + } + tostring += "\n"; + } + return tostring; +} diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 1e78d0d..79462fb 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -1,13 +1,28 @@ +#include "RNGLRParser.h" -RNGLRParser::parseInput(std::string inputString) { +RNGLRParser::RNGLRParser() { + // +} + +RNGLRParser::~RNGLRParser() { + // +} + +NodeTree* RNGLRParser::parseInput(std::string inputString) { //Check for no tokens + bool accepting = false; if (inputString == "") { - if (table.get(0,EOFSymbol)->action == ParseAction::REDUCE) + std::vector* zeroStateActions = table.get(0,EOFSymbol); + for (int i = 0; i < zeroStateActions->size(); i++) { + if ((*zeroStateActions)[i]->action == ParseAction::REDUCE) + accepting = true; + } + if (accepting) std::cout << "Accepted!" << std::endl; else std::cout << "Rejected, no input (with no accepting state)" << std::endl; - return; + return new NodeTree(); } lexer.setInput(inputString); @@ -15,51 +30,86 @@ RNGLRParser::parseInput(std::string inputString) { //It could be converted to on-line later. Symbol* currentToken = lexer.next(); input.push_back(currentToken); - while (*currentToken != *EOFToken) { + while (*currentToken != *EOFSymbol) { + std::cout << EOFSymbol->toString() << " " << currentToken->toString() << std::endl; currentToken = lexer.next(); input.push_back(currentToken); } + std::cout << "\n\n\nDone with Lexing\n\n\n" << std::endl; + + + for (int i = 0; i < input.size(); i++) + std::cout << "|" << input[i]->toString() << "|"; + std::cout << std::endl; + + + std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl; + //Frontier 0, new node with state 0 NodeTree* v0 = gss.newNode(0); gss.addToFrontier(0,v0); - std::vector firstActions = table.get(0, input[0]); + std::cout << "Done setting up new frontier" << std::endl; + + std::vector firstActions = *(table.get(0, input[0])); for (std::vector::size_type i = 0; i < firstActions.size(); i++) { if (firstActions[i]->action == ParseAction::SHIFT) - toShift.push_back(std::make_pair(v0,firstActions[i]->toState())); - else if (firstActions[i]->action == ParseAction::REDUCE && firstActions[i]->reduceRule->getRightSide()->size() == 0) { - toReduce.push_back(std::make_pair(std::make_pair(v0, firstActions[i]->reduceRule->getLeftSide()), 0)); + toShift.push(std::make_pair(v0,firstActions[i]->shiftState)); + else if (firstActions[i]->action == ParseAction::REDUCE && firstActions[i]->reduceRule->getRightSide().size() == 0) { + toReduce.push(std::make_pair(std::make_pair(v0, firstActions[i]->reduceRule->getLeftSide()), 0)); } } + std::cout << "GSS:\n" << gss.toString() << std::endl; + + std::cout << "Starting parse loop" << std::endl; + for (int i = 0; i < input.size(); i++) { - if (gss.frontierIsEmpty(i)) + std::cout << "Checking if frontier " << i << " is empty" << std::endl; + if (gss.frontierIsEmpty(i)) { + std::cout << "Frontier " << i << " is empty." << std::endl; break; - while (toReduce.size() != 0) + } + while (toReduce.size() != 0) { + std::cout << "Reducing for " << i << std::endl; + //std::cout << "GSS:\n" << gss.toString() << std::endl; reducer(i); + } + std::cout << "Shifting for " << i << std::endl; shifter(i); + std::cout << "GSS:\n" << gss.toString() << std::endl; } - if (gss.frontierHasAccSt(input.size()-1)) + std::cout << "Done with parsing loop, checking for acceptance" << std::endl; + if (gss.frontierHasAccState(input.size()-1)) std::cout << "Accepted!" << std::endl; else std::cout << "Rejected!" << std::endl; - return; + + std::cout << "GSS:\n" << gss.toString() << std::endl; + return new NodeTree(); } -RNGLRParser::reducer(int i) { +void RNGLRParser::reducer(int i) { std::pair< std::pair*, Symbol*>, int > reduction = toReduce.front(); - int pathLength = reduction.second > 0 : reduction.second -1 ? 0; + toReduce.pop(); + std::cout << "Doing reduction of length " << reduction.second << " from state " << reduction.first.first->getData() << " to symbol " << reduction.first.second->toString() << std::endl; + int pathLength = reduction.second > 0 ? reduction.second -1 : 0; std::vector*>* reachable = gss.getReachable(reduction.first.first, pathLength); for (std::vector*>::size_type j = 0; j < reachable->size(); j++) { NodeTree* currentReached = (*reachable)[j]; - int toState = table.getShift(currentReached->state(), reduction.first.second); + std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.first.second->toString() << std::endl; + int toState = table.getShift(currentReached->getData(), reduction.first.second)->shiftState; NodeTree* toStateNode = gss.inFrontier(i, toState); if (toStateNode) { if (!gss.hasEdge(toStateNode, currentReached)) { gss.addEdge(toStateNode, currentReached); if (reduction.second != 0) { - //Do all non null reductions + //Do all non null reduction + std::vector actions = *(table.get(toState, input[i])); + for (std::vector::size_type k = 0; k < actions.size(); k++) + if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0) + toReduce.push(std::make_pair(std::make_pair(currentReached, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->getRightSize())); } } } else { @@ -67,47 +117,51 @@ RNGLRParser::reducer(int i) { gss.addToFrontier(i, toStateNode); gss.addEdge(toStateNode, currentReached); - std::vector actions = table.get(toState, input[i+1]); + std::vector actions = *(table.get(toState, input[i])); for (std::vector::size_type k = 0; k < actions.size(); k++) { //Shift if (actions[k]->action == ParseAction::SHIFT) - nextShifts.push_back(std::make_pair(toStateNode, actions[k]->shiftState)); - else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule()->size() != 0) - toReduce.push_back(std::make_pair(std::make_pair(currentReached, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->size())); - else (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule()->size() == 0) - toReduce.push_back(std::make_pair(std::make_pair(toStateNode, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->size())); + toShift.push(std::make_pair(toStateNode, actions[k]->shiftState)); + else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0) + toReduce.push(std::make_pair(std::make_pair(currentReached, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->getRightSize())); + else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() == 0) + toReduce.push(std::make_pair(std::make_pair(toStateNode, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->getRightSize())); } - } } } -RNGLRParser::shifter(int i) { - if (i != input.length()-1) { - std::queue nextShifts; +void RNGLRParser::shifter(int i) { + if (i != input.size()-1) { + std::queue< std::pair*, int> > nextShifts; while (!toShift.empty()) { std::pair*, int> shift = toShift.front(); + toShift.pop(); + std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl; NodeTree* shiftTo = gss.inFrontier(i+1, shift.second); if (shiftTo) { + std::cout << "State already existed, just adding edge" << std::endl; gss.addEdge(shiftTo, shift.first); - std::vector actions = table.get(shift.second, input[i+2]); + std::vector actions = *(table.get(shift.second, input[i+1])); for (std::vector::size_type j = 0; j < actions.size(); j++) { - if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule->size() != 0) - toReduce.push_back(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size())); + if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule->getRightSize() != 0) + toReduce.push(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->getRightSize())); } } else { + std::cout << "State did not already exist, adding" << std::endl; shiftTo = gss.newNode(shift.second); gss.addToFrontier(i+1, shiftTo); gss.addEdge(shiftTo, shift.first); - std::vector actions = table.get(shift.toState(), input[i+2]); + std::vector actions = *(table.get(shift.second, input[i+1])); for (std::vector::size_type j = 0; j < actions.size(); j++) { + std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl; //Shift if (actions[j]->action == ParseAction::SHIFT) - nextShifts.push_back(std::make_pair(shiftTo, actions[j]->shiftState)); - else if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule()->size() != 0) - toReduce.push_back(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size())); - else (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule()->size() == 0) - toReduce.push_back(std::make_pair(std::make_pair(shiftTo, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size())); + nextShifts.push(std::make_pair(shiftTo, actions[j]->shiftState)); + else if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule->getRightSize() != 0) + toReduce.push(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->getRightSize())); + else if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule->getRightSize() == 0) + toReduce.push(std::make_pair(std::make_pair(shiftTo, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->getRightSize())); } } } diff --git a/src/Symbol.cpp b/src/Symbol.cpp index 17a2f2b..116e38f 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -28,6 +28,10 @@ const bool Symbol::operator==(const Symbol &other) { return( name == other.name && terminal == other.terminal); } +const bool Symbol::operator!=(const Symbol &other) { + return(!this->operator==(other)); +} + std::string Symbol::getName() { return(name); } diff --git a/src/Table.cpp b/src/Table.cpp index 1f88af5..39373c3 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -80,10 +80,17 @@ std::vector* Table::get(int state, Symbol* token) { } } - //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec - //(This assumes singular goal assignment, a simplification for now) + if (symbolIndex == -1) { + std::cout << "Unrecognized symbol: " << token->toString() << ", cannot get from table!" << std::endl; + return NULL; + } + + std::cout << "Get for state: " << state << ", and Symbol: " << token->toString() << std::endl; + std::vector* action = (*(table[state]))[symbolIndex]; + //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec + //(This assumes singular goal assignment, a simplification for now) if (state == 1 && symbolIndex == 0) { if (action == NULL) action = new std::vector(); @@ -106,6 +113,18 @@ std::vector* Table::get(int state, Symbol* token) { return (action); } +ParseAction* Table::getShift(int state, Symbol* token) { + std::vector* actions = get(state, token); + ParseAction* shift = NULL; + for (int i = 0; i < actions->size(); i++) { + if ((*actions)[i]->action == ParseAction::SHIFT) { + shift = (*actions)[i]; + break; + } + } + return shift; +} + std::string Table::toString() { std::string concat = ""; for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++)