From 949dbc532aca6182b9e6ce66752c98c5c4c5eb04 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 4 Jun 2013 19:50:16 -0400 Subject: [PATCH] Fixed lots of bugs, added much more sane and efficient ParseAction table instead of recalculating every time. Fixed lots of bugs and inefficencies. Some temporary hacks in the table, mostly having to do with not having an EOF Symbol yet. --- CMakeLists.txt | 2 +- include/ParseAction.h | 3 + include/Parser.h | 13 ++- include/State.h | 2 +- include/util.h | 9 ++ main.cpp | 1 + src/ParseAction.cpp | 20 +++- src/Parser.cpp | 209 ++++++++++++++++++++++++++++-------------- src/State.cpp | 12 +-- src/util.cpp | 7 ++ 10 files changed, 192 insertions(+), 86 deletions(-) create mode 100644 include/util.h create mode 100644 src/util.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d3a103..f215870 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ParseAction.h b/include/ParseAction.h index c421b44..e7103f0 100644 --- a/include/ParseAction.h +++ b/include/ParseAction.h @@ -5,6 +5,7 @@ #define NULL 0 #endif +#include "util.h" #include "ParseRule.h" #include @@ -17,6 +18,8 @@ class ParseAction { ParseAction(ActionType action, ParseRule* reduceRule); ParseAction(ActionType action, int shiftState); ~ParseAction(); + bool const operator==(const ParseAction &other); + bool const operator!=(const ParseAction &other); std::string toString(); static std::string actionToString(ActionType action); diff --git a/include/Parser.h b/include/Parser.h index ce3f6ff..b70c99d 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -5,6 +5,7 @@ #define NULL 0 #endif +#include "util.h" #include "ParseRule.h" #include "ParseAction.h" #include "Symbol.h" @@ -27,14 +28,17 @@ class Parser { void loadGrammer(std::string grammerInputString); void createStateSet(); void closure(State* state); - void addState(std::vector< State* >* stateSets, State* state, Symbol*); + void addStates(std::vector< State* >* stateSets, State* state); std::string stateSetToString(); - int gotoTable(int state, Symbol* token); - ParseAction* actionTable(int state, Symbol* token); + void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action); + ParseAction* getTable(int state, Symbol* token); NodeTree* parseInput(std::string inputString); std::string grammerToString(); std::string grammerToDOT(); + + std::string tableToString(); + private: StringReader reader; std::map symbols; @@ -42,7 +46,8 @@ class Parser { std::vector< State* > stateSets; - //std::vector< std::vector > + std::vector< std::vector* > table; + std::vector symbolIndexVec; std::stack stateStack; std::stack symbolStack; diff --git a/include/State.h b/include/State.h index ece15f2..8e62638 100644 --- a/include/State.h +++ b/include/State.h @@ -5,6 +5,7 @@ #define NULL 0 #endif +#include "util.h" #include "ParseRule.h" #include @@ -16,7 +17,6 @@ class State { public: State(int number, ParseRule* basis); ~State(); - std::string intToString(int theInt); bool const operator==(const State &other); bool const operator!=(const State &other); std::vector* getBasis(); diff --git a/include/util.h b/include/util.h new file mode 100644 index 0000000..0a79c2d --- /dev/null +++ b/include/util.h @@ -0,0 +1,9 @@ +#ifndef UTIL_H +#define UTIL_H + +#include +#include + +std::string intToString(int theInt); + +#endif \ No newline at end of file diff --git a/main.cpp b/main.cpp index f49afab..5f992b7 100644 --- a/main.cpp +++ b/main.cpp @@ -58,6 +58,7 @@ int main(int argc, char* argv[]) { //std::cout << "Doing stateSetToString from Main" << std::endl; std::cout << parser.stateSetToString() << std::endl; //std::cout << "finished stateSetToString from Main" << std::endl; + std::cout << parser.tableToString() << std::endl; std::cout << grammerInputFileString << std::endl; std::cout << parser.grammerToString() << std::endl; diff --git a/src/ParseAction.cpp b/src/ParseAction.cpp index 6296d58..dd9193a 100644 --- a/src/ParseAction.cpp +++ b/src/ParseAction.cpp @@ -22,6 +22,15 @@ ParseAction::~ParseAction() { } + +const bool ParseAction::operator==(const ParseAction &other) { + return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState); +} + +const bool ParseAction::operator!=(const ParseAction &other) { + return !(this->operator==(other)); +} + std::string ParseAction::actionToString(ActionType action) { switch (action) { case REDUCE: @@ -40,10 +49,11 @@ std::string ParseAction::actionToString(ActionType action) { } std::string ParseAction::toString() { - std::string outputString = actionToString(action); - if (reduceRule) - outputString += " " + reduceRule->toString(); - if (shiftState) - outputString += " " + shiftState; + std::string outputString = ""; + outputString += actionToString(action); + if (reduceRule != NULL) + outputString += " " + reduceRule->toString(); + if (shiftState != -1) + outputString += " " + intToString(shiftState); return(outputString); } \ No newline at end of file diff --git a/src/Parser.cpp b/src/Parser.cpp index caa506b..25ea7ee 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -56,16 +56,10 @@ void Parser::createStateSet() { stateSets.push_back( new State(0, loadedGrammer[0]) ); //std::cout << "Begining for main set for loop" << std::endl; for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) { - //std::cout << "calling closure on " << stateSets[i]->toString() << std::endl; + //closure closure(stateSets[i]); - //std::cout << "finished closure" << std::endl; - //std::cout << "Starting inner for loop that adds states" << std::endl; - std::vector* allRules = stateSets[i]->getTotal(); - for (std::vector::size_type j = 0; j < allRules->size(); j++) { - //std::cout << "about to call addState" << std::endl; - addState(&stateSets, stateSets[i], (*allRules)[j]->getAtNextIndex()); - //Closure will be called in the outer loop - } + //Add the new states + addStates(&stateSets, stateSets[i]); } } @@ -94,13 +88,14 @@ void Parser::closure(State* state) { } //Adds state if it doesn't already exist. -void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* symbol) { +void Parser::addStates(std::vector< State* >* stateSets, State* state) { std::vector< State* > newStates; //For each rule in the state we already have - for (std::vector::size_type i = 0; i < state->getTotal()->size(); i++) { + std::vector* currStateTotal = state->getTotal(); + for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { //Clone the current rule - ParseRule* advancedRule = (*state->getTotal())[i]->clone(); - //Try to advance the pointer + ParseRule* advancedRule = (*currStateTotal)[i]->clone(); + //Try to advance the pointer, if sucessful see if it is the correct next symbol if (advancedRule->advancePointer()) { //Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state @@ -112,9 +107,8 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { symbolAlreadyInState = true; //So now check to see if this exact rule is in this state - if (!newStates[j]->containsRule(advancedRule)) { + if (!newStates[j]->containsRule(advancedRule)) newStates[j]->basis.push_back(advancedRule); - } //We found a state with the same symbol, so stop searching break; } @@ -124,19 +118,36 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy newStates.push_back(newState); } } + //Also add any completed rules as reduces in the action table + //See if reduce + //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... + if ((*currStateTotal)[i]->isAtEnd()) { + std::cout << (*currStateTotal)[i]->toString() << " is at end, adding reduce to table" << std::endl; + //This should iterate through the follow set, but right now is LR(0), so all symbols + for (std::vector::size_type j = 0; j < symbolIndexVec.size(); j++) + addToTable(state, symbolIndexVec[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + } else { + std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl; + } } //Put all our new states in the set of states only if they're not already there. bool stateAlreadyInAllStates = false; + Symbol* currStateSymbol; for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { + currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex(); for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) { if (*(newStates[i]) == *((*stateSets)[j])) { stateAlreadyInAllStates = true; - //std::cout << newStates[i]->toString() << " is equal to\n" << (*stateSets)[j]->toString() << std::endl; + //If it does exist, we should add it as the shift/goto in the action table + addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); + break; } } if (!stateAlreadyInAllStates) { stateSets->push_back(newStates[i]); stateAlreadyInAllStates = false; + //If the state does not already exist, add it and add it as the shift/goto in the action table + addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1)); } } } @@ -149,62 +160,116 @@ std::string Parser::stateSetToString() { return concat; } -int Parser::gotoTable(int state, Symbol* token) { - std::vector allInState = *(stateSets[state]->getTotal()); - ParseRule* currentRule; - for (std::vector::size_type i = 0; i < allInState.size(); i++) { - currentRule = allInState[i]; - if (*(currentRule->getAtNextIndex()) == *token) { - ParseRule* advancedCurrent = currentRule->clone(); - advancedCurrent->advancePointer(); - for (std::vector::size_type j = 0; j < stateSets.size(); j++) { - for (std::vector::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) { - if ( *(stateSets[j]->basis[k]) == *advancedCurrent) - return(j); - } - } +void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) { + + //find what state num the from state is + int stateNum = -1; + for (std::vector::size_type i = 0; i < stateSets.size(); i++) { + if (*(stateSets[i]) == *fromState) { + stateNum = i; + break; } } - return(-1); + + //std::cout << "stateNum is " << stateNum << std::endl; + + //If state not in table, add up to and it. + //std::cout << "table size is " << table.size() <= table.size()) { + //std::cout << "Pushing back table" << std::endl; + table.push_back(new std::vector); + } + + //find out what index this symbol is on + int symbolIndex = -1; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( *(symbolIndexVec[i]) == *tranSymbol ) { + //Has been found + symbolIndex = i; + break; + } + } + //std::cout << "symbolIndex is " << symbolIndex << std::endl; + + //If we've never done this symbol, add it + if (symbolIndex < 0) { + // std::cout << "pushing back symbolIndexVec" <toString() << std::endl; + + //std::cout << table[stateNum] << " "; + while (symbolIndex >= table[stateNum]->size()) { + table[stateNum]->push_back(NULL); + } + + //If this table slot is empty + //std::cout << "table[stateNum] is " << table[stateNum] << std::endl; + //std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl; + + if ( (*(table[stateNum]))[symbolIndex] == NULL ) { + std::cout << "Null, adding " << action->toString() << std::endl; + (*(table[stateNum]))[symbolIndex] = action; + } + //If the slot is not empty and does not contain ourself, then it is a conflict + else if ( *((*(table[stateNum]))[symbolIndex]) != *action) { + std::cout << "not Null!" << std::endl; + std::cout << "Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl; + //Don't overwrite + //(*(table[stateNum]))[symbolIndex] = action; + } } -ParseAction* Parser::actionTable(int state, Symbol* token) { - std::vector* allStateRules = stateSets[state]->getTotal(); - ParseRule* currentRule; +std::string Parser::tableToString() { + std::string concat = ""; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) + concat += "\t" + symbolIndexVec[i]->toString(); + concat += "\n"; - //Get the completed Goal rule for comparision to see if we need to accept - ParseRule* completedGoal = stateSets[0]->basis[0]->clone(); - while (completedGoal->advancePointer()) {} - - for (std::vector::size_type i = 0; i < allStateRules->size(); i++) { - currentRule = (*allStateRules)[i]; - - //If the current rule in the state is completed, then do a reduce action - if (currentRule->isAtEnd()) { - //But first, if our advanced rule is equal to the completedGoal, we accept - if (*currentRule == *completedGoal) - return new ParseAction(ParseAction::ACCEPT); - return new ParseAction(ParseAction::REDUCE, currentRule); + for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) { + concat += intToString(i) + "\t"; + for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) { + if ( (*(table[i]))[j] != NULL) + concat += (*(table[i]))[j]->toString() + "\t"; + else + concat += "NULL\t"; } - - //If the current rule in the state is not completed, see if it has the next correct token - //std::cout << currentRule->getAtNextIndex()->toString() << " comp to " << token->toString() << std::endl; - if ( *(currentRule->getAtNextIndex()) == *token){ - //If it does have the correct next token, then find the state that has this rule advanced as basis, that is the state we shift to - //Goes to n^2 here, really need that table - ParseRule* advancedCurrent = currentRule->clone(); - advancedCurrent->advancePointer(); - - for (std::vector::size_type j = 0; j < stateSets.size(); j++) { - for (std::vector::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) { - if ( *(stateSets[j]->basis[k]) == *advancedCurrent) - return new ParseAction(ParseAction::SHIFT, j); - } - } - } - + concat += "\n"; } - return new ParseAction(ParseAction::REJECT); + return(concat); +} + +ParseAction* Parser::getTable(int state, Symbol* token) { + int symbolIndex = -1; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( *(symbolIndexVec[i]) == *token) { + symbolIndex = i; + break; + } + } + + //This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec + //(This assumes singular goal assignment, a simplification for now) + if (state == 1 && symbolIndex == -1) + return(new ParseAction(ParseAction::ACCEPT)); + + //Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol + if (symbolIndex == -1) + symbolIndex = 0; + + //If ourside the symbol range of this state (same as NULL), reject + if ( symbolIndex >= table[state]->size() ) + return(new ParseAction(ParseAction::REJECT)); + + ParseAction* action = (*(table[state]))[symbolIndex]; + //If null, reject. (this is a space with no other action) + if (action == NULL) + return(new ParseAction(ParseAction::REJECT)); + + //Otherwise, we have something, so return it + return (action); } NodeTree* Parser::parseInput(std::string inputString) { @@ -217,10 +282,13 @@ NodeTree* Parser::parseInput(std::string inputString) { symbolStack.push(new Symbol("INVALID", false)); while (true) { - action = actionTable(stateStack.top(), token); + std::cout << "In state: " << intToString(stateStack.top()) << std::endl; + action = getTable(stateStack.top(), token); switch (action->action) { case ParseAction::REDUCE: { + std::cout << "Reduce by " << action->reduceRule->toString() << std::endl; + int rightSideLength = action->reduceRule->getRightSide().size(); //Keep track of symbols popped for parse tree std::vector poppedSymbols; @@ -234,15 +302,18 @@ NodeTree* Parser::parseInput(std::string inputString) { Symbol* newSymbol = action->reduceRule->getLeftSide()->clone(); newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols)); symbolStack.push(newSymbol); - stateStack.push(gotoTable(stateStack.top(), symbolStack.top())); - std::cout << "Reduce by " << action->reduceRule->toString() << std::endl; + std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; + stateStack.push(getTable(stateStack.top(), symbolStack.top())->shiftState); + std::cout << "Reduced, now condition is" << std::endl; + std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; break; } case ParseAction::SHIFT: + std::cout << "Shift " << token->toString() << std::endl; + symbolStack.push(token); token = new Symbol("\""+inputReader.word()+"\"", true); stateStack.push(action->shiftState); - std::cout << "Shift " << symbolStack.top()->toString() << std::endl; break; case ParseAction::ACCEPT: std::cout << "ACCEPTED!" << std::endl; diff --git a/src/State.cpp b/src/State.cpp index c3c59f2..b9e82f5 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -42,6 +42,12 @@ std::vector* State::getTotal() { } return(&total); } +std::vector* State::getBasis() { + return &basis; +} +std::vector* State::getRemaining() { + return &remaining; +} bool State::containsRule(ParseRule* rule) { for (std::vector::size_type i = 0; i < basis.size(); i++) { @@ -55,12 +61,6 @@ bool State::containsRule(ParseRule* rule) { return false; } -std::string State::intToString(int theInt) { - std::stringstream converter; - converter << theInt; - return converter.str(); -} - std::string State::toString() { std::string concat = ""; concat += "State " + intToString(number) + ":\n"; diff --git a/src/util.cpp b/src/util.cpp new file mode 100644 index 0000000..2b69251 --- /dev/null +++ b/src/util.cpp @@ -0,0 +1,7 @@ +#include "util.h" + +std::string intToString(int theInt) { + std::stringstream converter; + converter << theInt; + return converter.str(); +} \ No newline at end of file