From 2eaf6408552a10d3a90774864b4df15c85608898 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 16 Aug 2013 00:03:26 -0400 Subject: [PATCH] In progress --- include/RNGLRParser.h | 1 + include/State.h | 3 ++ main.cpp | 20 ++++---- src/GraphStructuredStack.cpp | 8 +-- src/Lexer.cpp | 6 +-- src/Parser.cpp | 6 +-- src/RNGLRParser.cpp | 94 +++++++++++++++++++----------------- src/RegEx.cpp | 4 +- src/State.cpp | 52 +++++++++++++++++--- src/Table.cpp | 19 ++++++-- 10 files changed, 137 insertions(+), 76 deletions(-) diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h index 88672da..a46eda1 100644 --- a/include/RNGLRParser.h +++ b/include/RNGLRParser.h @@ -23,6 +23,7 @@ class RNGLRParser: public Parser { void addChildren(NodeTree* parent, std::vector*>* children, NodeTree* nullableParts); void addStates(std::vector< State* >* stateSets, State* state); + void addStateReductionsToTable(State* state); bool fullyReducesToNull(ParseRule* rule); bool reducesToNull(ParseRule* rule); bool reducesToNull(ParseRule* rule, std::vector avoidList); diff --git a/include/State.h b/include/State.h index 6a8fb3d..7a6a52c 100644 --- a/include/State.h +++ b/include/State.h @@ -20,13 +20,16 @@ class State { ~State(); bool const operator==(const State &other); bool const basisEquals(const State &other); + bool const basisEqualsExceptLookahead(const State &other); bool const operator!=(const State &other); std::vector* getBasis(); std::vector* getRemaining(); std::vector* getTotal(); bool containsRule(ParseRule* rule); + void addRuleCombineLookahead(ParseRule* rule); std::string toString(); + void combineStates(State &other); void addParents(std::vector* parents); std::vector* getParents(); std::vector* getDeepParents(int depth); diff --git a/main.cpp b/main.cpp index 956b138..c013e1d 100644 --- a/main.cpp +++ b/main.cpp @@ -48,29 +48,29 @@ int main(int argc, char* argv[]) { RNGLRParser parser; parser.loadGrammer(grammerInputFileString); //std::cout << "Creating State Set from Main" << std::endl; - std::cout << "\n\n\n\n\n\n\n\n\n\nState Set" << std::endl; + std::cout << "\nState Set" << std::endl; parser.createStateSet(); //std::cout << "finished State Set from Main" << std::endl; //std::cout << "Doing stateSetToString from Main" << std::endl; - std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; + // std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; std::cout << parser.stateSetToString() << std::endl; std::cout << "finished stateSetToString from Main" << std::endl; - std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl; - std::cout << parser.tableToString() << std::endl; - std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl; - std::cout << grammerInputFileString << std::endl; - std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer toString" << std::endl; - std::cout << parser.grammerToString() << std::endl; + std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl; + std::cout << parser.tableToString() << std::endl; + // std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl; + // std::cout << grammerInputFileString << std::endl; + // std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer toString" << std::endl; + // std::cout << parser.grammerToString() << std::endl; //std::cout << parser.grammerToDOT() << std::endl; //outFile << parser.grammerToDOT() << std::endl; - std::cout << "\n\n\n\n\n\n\n\n\n\nParsing" << std::endl; + std::cout << "\nParsing" << std::endl; std::cout << programInputFileString << std::endl; NodeTree* parseTree = parser.parseInput(programInputFileString); if (parseTree) { - std::cout << parseTree->DOTGraphString() << std::endl; + //std::cout << parseTree->DOTGraphString() << std::endl; outFile << parseTree->DOTGraphString() << std::endl; } diff --git a/src/GraphStructuredStack.cpp b/src/GraphStructuredStack.cpp index a125dcc..0240eac 100644 --- a/src/GraphStructuredStack.cpp +++ b/src/GraphStructuredStack.cpp @@ -15,10 +15,10 @@ NodeTree* GraphStructuredStack::newNode(int stateNum) { void GraphStructuredStack::addToFrontier(int frontier, NodeTree* node) { //First, make sure our vector has this and lesser frontiers. If not, add it and up to it while (gss.size() <= frontier) { - std::cout << "Adding a new frontier: " << gss.size() << std::endl; + //std::cout << "Adding a new frontier: " << gss.size() << std::endl; gss.push_back(new std::vector*>()); } - std::cout << "Adding " << node << " (" << node->getData() << ") to frontier " << frontier << std::endl; + //std::cout << "Adding " << node << " (" << node->getData() << ") to frontier " << frontier << std::endl; gss[frontier]->push_back(node); } @@ -63,7 +63,7 @@ std::vector*>* GraphStructuredStack::getReachable(NodeTree* s NodeTree* currentNode = currentNodes.front(); currentNodes.pop(); std::vector*> children = currentNode->getChildren(); - std::cout << currentNode->getData() << " has children "; + //std::cout << currentNode->getData() << " has children "; for (std::vector*>::size_type j = 0; j < children.size(); j++) { std::cout << children[j]->getData() << " "; nextNodes.push(children[j]); @@ -77,7 +77,7 @@ std::vector*>* GraphStructuredStack::getReachable(NodeTree* s } while (!currentNodes.empty()) { reachableList->push_back(currentNodes.front()); - std::cout << currentNodes.front()->getData() << " is reachable from " << start->getData() << " by length " << length << std::endl; + //std::cout << currentNodes.front()->getData() << " is reachable from " << start->getData() << " by length " << length << std::endl; currentNodes.pop(); } return reachableList; diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 5373c9b..937787e 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -23,7 +23,7 @@ void Lexer::addRegEx(std::string regExString) { } Symbol* Lexer::next() { - std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <= input.length()-1) return new Symbol("$EOF$", true); @@ -44,8 +44,8 @@ Symbol* Lexer::next() { //std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <getPattern(), true, eatenString); } else { - std::cout << "Found no applicable regex" << std::endl; - std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl; + //std::cout << "Found no applicable regex" << std::endl; + //std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl; return NULL; } } \ No newline at end of file diff --git a/src/Parser.cpp b/src/Parser.cpp index 0097fc2..d3481fb 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -74,8 +74,8 @@ void Parser::loadGrammer(std::string grammerInputString) { } std::cout << "Parsed!\n"; - for (std::vector::size_type i = 0; i < loadedGrammer.size(); i++) - std::cout << loadedGrammer[i]->toString() << std::endl; + // for (std::vector::size_type i = 0; i < loadedGrammer.size(); i++) + // std::cout << loadedGrammer[i]->toString() << std::endl; } void Parser::createStateSet() { @@ -222,7 +222,7 @@ void Parser::closure(State* state) { bool isAlreadyInState = false; for (std::vector::size_type k = 0; k < stateTotal->size(); k++) { if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) { - std::cout << (*stateTotal)[k]->toString() << std::endl; + //std::cout << (*stateTotal)[k]->toString() << std::endl; (*stateTotal)[k]->addLookahead(currentGramRule->getLookahead()); isAlreadyInState = true; break; diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 8153e92..9dee76a 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -33,7 +33,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { Symbol* currentToken = lexer.next(); input.push_back(currentToken); while (*currentToken != *EOFSymbol) { - std::cout << EOFSymbol->toString() << " " << currentToken->toString() << std::endl; + //std::cout << EOFSymbol->toString() << " " << currentToken->toString() << std::endl; currentToken = lexer.next(); if (currentToken != NULL) { input.push_back(currentToken); @@ -43,12 +43,12 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { } } - std::cout << "\n\n\nDone with Lexing\n\n\n" << std::endl; + std::cout << "\nDone with Lexing\n" << std::endl; - for (int i = 0; i < input.size(); i++) - std::cout << "|" << input[i]->toString() << "|"; - std::cout << std::endl; + // for (int i = 0; i < input.size(); i++) + // std::cout << "|" << input[i]->toString() << "|"; + // std::cout << std::endl; std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl; @@ -70,12 +70,12 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { } } - std::cout << "GSS:\n" << gss.toString() << std::endl; + // std::cout << "GSS:\n" << gss.toString() << std::endl; std::cout << "Starting parse loop" << std::endl; for (int i = 0; i < input.size(); i++) { - std::cout << "Checking if frontier " << i << " is empty" << std::endl; + // std::cout << "Checking if frontier " << i << " is empty" << std::endl; if (gss.frontierIsEmpty(i)) { std::cout << "Frontier " << i << " is empty." << std::endl; std::cout << "Failed on " << input[i]->toString() << std::endl; @@ -86,13 +86,13 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { SPPFStepNodes.clear(); while (toReduce.size() != 0) { - std::cout << "Reducing for " << i << std::endl; + //std::cout << "Reducing for " << i << std::endl; //std::cout << "GSS:\n" << gss.toString() << std::endl; reducer(i); } - std::cout << "Shifting for " << i << std::endl; + // std::cout << "Shifting for " << i << std::endl; shifter(i); - std::cout << "GSS:\n" << gss.toString() << std::endl; + //std::cout << "GSS:\n" << gss.toString() << std::endl; } std::cout << "Done with parsing loop, checking for acceptance" << std::endl; NodeTree* accState = gss.frontierGetAccState(input.size()-1); @@ -109,7 +109,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { void RNGLRParser::reducer(int i) { Reduction reduction = toReduce.front(); toReduce.pop(); - std::cout << "Doing reduction of length " << reduction.length << " from state " << reduction.from->getData() << " to symbol " << reduction.symbol->toString() << std::endl; + //std::cout << "Doing reduction of length " << reduction.length << " from state " << reduction.from->getData() << " to symbol " << reduction.symbol->toString() << std::endl; int pathLength = reduction.length > 0 ? reduction.length -1 : 0; //Get every reachable path std::vector*> >* paths = gss.getReachablePaths(reduction.from, pathLength); @@ -155,7 +155,7 @@ void RNGLRParser::reducer(int i) { gss.addEdge(toStateNode, currentReached, newLabel); if (reduction.length != 0) { //Do all non null reduction - std::cout << "Checking for non-null reductions in states that already existed" << std::endl; + //std::cout << "Checking for non-null reductions in states that already existed" << std::endl; std::vector actions = *(table.get(toState, input[i])); for (std::vector::size_type k = 0; k < actions.size(); k++) { if (actions[k]->action == ParseAction::REDUCE && !fullyReducesToNull(actions[k]->reduceRule)) { @@ -170,7 +170,7 @@ void RNGLRParser::reducer(int i) { gss.addToFrontier(i, toStateNode); gss.addEdge(toStateNode, currentReached, newLabel); - std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl; + //std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl; std::vector actions = *(table.get(toState, input[i])); for (std::vector::size_type k = 0; k < actions.size(); k++) { std::cout << "Action is " << actions[k]->toString() << std::endl; @@ -200,7 +200,7 @@ void RNGLRParser::shifter(int i) { std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl; NodeTree* shiftTo = gss.inFrontier(i+1, shift.second); if (shiftTo) { - std::cout << "State already existed, just adding edge" << std::endl; + //std::cout << "State already existed, just adding edge" << std::endl; gss.addEdge(shiftTo, shift.first, newLabel); std::vector actions = *(table.get(shift.second, input[i+1])); for (std::vector::size_type j = 0; j < actions.size(); j++) { @@ -210,7 +210,7 @@ void RNGLRParser::shifter(int i) { } } } else { - std::cout << "State did not already exist, adding" << std::endl; + //std::cout << "State did not already exist, adding" << std::endl; shiftTo = gss.newNode(shift.second); gss.addToFrontier(i+1, shiftTo); gss.addEdge(shiftTo, shift.first, newLabel); @@ -259,7 +259,7 @@ void RNGLRParser::addChildren(NodeTree* parent, std::vector* node, std::vector*>* nodes) { - std::cout << "Checking " << node->getData()->toString() << "'s family" << std::endl; + //std::cout << "Checking " << node->getData()->toString() << "'s family" << std::endl; std::vector*> children = node->getChildren(); for (std::vector*>::size_type i = 0; i < nodes->size(); i++) { bool containsOne = false; @@ -312,9 +312,8 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) { for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) { if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { symbolAlreadyInState = true; - //So now check to see if this exact rule is in this state - if (!newStates[j]->containsRule(advancedRule)) - newStates[j]->basis.push_back(advancedRule); + //Add rule to state, combining with idenical rule except lookahead if exists + newStates[j]->addRuleCombineLookahead(advancedRule); //We found a state with the same symbol, so stop searching break; } @@ -324,7 +323,36 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) { newStates.push_back(newState); } } - //Also add any completed rules as reduces in the action table + } + //Put all our new states in the set of states only if they're not already there. + bool stateAlreadyInAllStates = false; + Symbol* currStateSymbol; + for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { + stateAlreadyInAllStates = false; + currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex(); + for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) { + if (newStates[i]->basisEqualsExceptLookahead(*((*stateSets)[j]))) { + stateAlreadyInAllStates = true; + //If it does exist, we should add it as the shift/goto in the action table + (*stateSets)[j]->combineStates(*(newStates[i])); + addStateReductionsToTable((*stateSets)[j]); + table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); + break; + } + } + if (!stateAlreadyInAllStates) { + //If the state does not already exist, add it and add it as the shift/goto in the action table + stateSets->push_back(newStates[i]); + table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1)); + } + } + //Also add any completed rules as reduces in the action table + addStateReductionsToTable(state); +} + +void RNGLRParser::addStateReductionsToTable(State* state) { + std::vector* currStateTotal = state->getTotal(); + for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { //See if reduce //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); @@ -333,7 +361,7 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) { table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); //If this has an appropriate ruduction to null, get the reduce trees out } else if (reducesToNull((*currStateTotal)[i])) { - std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl; + //std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl; //If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side that is equal to //the part that we've already gone through in the rule. (so we don't pop extra off stack) //Now we use the same rule and make sure that the index location is used @@ -346,27 +374,6 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) { //table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule)); } } - //Put all our new states in the set of states only if they're not already there. - bool stateAlreadyInAllStates = false; - Symbol* currStateSymbol; - for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { - stateAlreadyInAllStates = false; - currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex(); - for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) { - if (newStates[i]->basisEquals(*((*stateSets)[j]))) { - stateAlreadyInAllStates = true; - //If it does exist, we should add it as the shift/goto in the action table - (*stateSets)[j]->addParents(newStates[i]->getParents()); - table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); - break; - } - } - if (!stateAlreadyInAllStates) { - //If the state does not already exist, add it and add it as the shift/goto in the action table - stateSets->push_back(newStates[i]); - table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1)); - } - } } bool RNGLRParser::fullyReducesToNull(ParseRule* rule) { @@ -421,8 +428,7 @@ NodeTree* RNGLRParser::getNullableParts(ParseRule* rule) { NodeTree* RNGLRParser::getNullableParts(ParseRule* rule, std::vector*> avoidList) { if (reducesToNull(rule)) { - std::cout << "Reduces to null so adding parts " << rule->toString() << std::endl; - //return new NodeTree("FAKE_PARTS_FOR_NO_CRASH", nullSymbol); + //std::cout << "Reduces to null so adding parts " << rule->toString() << std::endl; Symbol* symbol = rule->getLeftSide(); NodeTree* symbolNode = new NodeTree(symbol->getName(), symbol); if (*(rule->getAtNextIndex()) == *nullSymbol) { diff --git a/src/RegEx.cpp b/src/RegEx.cpp index 4599ac0..293acbe 100644 --- a/src/RegEx.cpp +++ b/src/RegEx.cpp @@ -263,8 +263,8 @@ int RegEx::longMatch(std::string stringToMatch) { if (!inCurrStates) currentStates.push_back(nextStates[j]); } - if (currentStates.size() != 0) - std::cout << "Matched " << i << " character: " << stringToMatch[i-1] << std::endl; + // if (currentStates.size() != 0) + // std::cout << "Matched " << i << " character: " << stringToMatch[i-1] << std::endl; nextStates.clear(); //If we can't continue matching, just return our last matched diff --git a/src/State.cpp b/src/State.cpp index ed9954b..b77caf1 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -44,12 +44,39 @@ const bool State::basisEquals(const State &other) { return false; for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) { - if (*(basis[i]) != *(other.basis[i])) + if (*(basis[i]) != (*(other.basis[i]))) return false; } return true; } +const bool State::basisEqualsExceptLookahead(const State &other) { + //return (basis == other.basis && remaining == other.remaining); + if (basis.size() != other.basis.size()) + return false; + + for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) { + if (!basis[i]->equalsExceptLookahead(*(other.basis[i]))) + return false; + } + return true; +} + +void State::combineStates(State &other) { + for (std::vector< ParseRule* >::size_type i = 0; i < other.basis.size(); i++) { + bool alreadyIn = false; + for (std::vector< ParseRule* >::size_type j = 0; j < basis.size(); j++) { + if (basis[j]->equalsExceptLookahead(*(other.basis[i]))) { + basis[j]->addLookahead(other.basis[i]->getLookahead()); + alreadyIn = true; + } + } + if (!alreadyIn) + basis.push_back(other.basis[i]); + } + addParents(other.getParents()); +} + std::vector* State::getTotal() { total.clear(); for (std::vector::size_type i = 0; i < basis.size(); i++) { @@ -68,17 +95,28 @@ std::vector* State::getRemaining() { } bool State::containsRule(ParseRule* rule) { - for (std::vector::size_type i = 0; i < basis.size(); i++) { - if (*rule == *(basis[i])) - return true; - } - for (std::vector::size_type i = 0; i < remaining.size(); i++) { - if (*rule == *(remaining[i])) + getTotal(); + for (std::vector::size_type i = 0; i < total.size(); i++) { + if (*rule == *(total[i])) { return true; + } } return false; } +void State::addRuleCombineLookahead(ParseRule* rule) { + getTotal(); + bool alreadyIn = false; + for (std::vector::size_type i = 0; i < total.size(); i++) { + if (rule->equalsExceptLookahead(*(total[i]))) { + total[i]->addLookahead(rule->getLookahead()); + alreadyIn = true; + } + } + if (!alreadyIn) + basis.push_back(rule); +} + std::string State::toString() { std::string concat = ""; concat += "State " + intToString(number) + " with " + intToString(parents.size()) + " parents:\n"; diff --git a/src/Table.cpp b/src/Table.cpp index 044a804..18587cc 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -67,7 +67,13 @@ void Table::add(int stateNum, Symbol* tranSymbol, ParseAction* action) { //std::cout << "not Null!" << std::endl; //std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << " on " << tranSymbol->toString() << std::endl; - (*(table[stateNum]))[symbolIndex]->push_back(action); + //Check to see if this action is already in the list + bool alreadyIn = false; + for (std::vector::size_type i = 0; i < (*(table[stateNum]))[symbolIndex]->size(); i++) + if (*((*((*(table[stateNum]))[symbolIndex]))[i]) == *action) + alreadyIn = true; + if (!alreadyIn) + (*(table[stateNum]))[symbolIndex]->push_back(action); } } @@ -98,8 +104,15 @@ std::vector* Table::get(int state, Symbol* token) { return NULL; } - std::cout << "Get for state: " << state << ", and Symbol: " << token->toString() << std::endl; - + //std::cout << "Get for state: " << state << ", and Symbol: " << token->toString() << std::endl; + if (state < 0 || state >= table.size()) { + std::cout << "State bad: " << state << std::endl; + return NULL; + } + if (symbolIndex < 0 || symbolIndex >= table[state]->size()) { + std::cout << "Symbol bad for this state: " << token->toString() << std::endl; + return NULL; + } std::vector* action = (*(table[state]))[symbolIndex]; //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec