diff --git a/CMakeLists.txt b/CMakeLists.txt index cfd8c03..2d3a103 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ParseRule.h b/include/ParseRule.h index d8b45fb..e2edbd7 100644 --- a/include/ParseRule.h +++ b/include/ParseRule.h @@ -18,6 +18,7 @@ class ParseRule { ~ParseRule(); bool const operator==(const ParseRule &other); + bool const operator!=(const ParseRule &other); ParseRule* clone(); @@ -26,6 +27,9 @@ class ParseRule { Symbol* getLeftSide(); std::vector getRightSide(); + Symbol* getAtNextIndex(); + Symbol* getAtIndex(); + int getRightSize(); int getIndex(); bool advancePointer(); diff --git a/include/Parser.h b/include/Parser.h index d370276..936e2b9 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -8,13 +8,13 @@ #include "ParseRule.h" #include "ParseAction.h" #include "Symbol.h" +#include "State.h" #include "StringReader.h" #include #include #include #include -#include #include class Parser { @@ -22,12 +22,10 @@ class Parser { Parser(); ~Parser(); - std::string intToString(int theInt); - void loadGrammer(std::string grammerInputString); void createStateSet(); - void closure(std::vector* state); - void addState(std::vector< std::vector* >* stateSets, std::vector* state, Symbol*); + void closure(State* state); + void addState(std::vector< State* >* stateSets, State* state, Symbol*); std::string stateSetToString(); int gotoTable(int state, Symbol* token); ParseAction* actionTable(int state, Symbol* token); @@ -40,7 +38,7 @@ class Parser { std::map symbols; std::vector loadedGrammer; - std::vector< std::vector* > stateSets; + std::vector< State* > stateSets; std::stack stateStack; std::stack symbolStack; diff --git a/include/State.h b/include/State.h new file mode 100644 index 0000000..ece15f2 --- /dev/null +++ b/include/State.h @@ -0,0 +1,36 @@ +#ifndef STATE_H +#define STATE_H + +#ifndef NULL +#define NULL 0 +#endif + +#include "ParseRule.h" + +#include +#include +#include +#include + +class State { + public: + State(int number, ParseRule* basis); + ~State(); + std::string intToString(int theInt); + bool const operator==(const State &other); + bool const operator!=(const State &other); + std::vector* getBasis(); + std::vector* getRemaining(); + std::vector* getTotal(); + bool containsRule(ParseRule* rule); + std::string toString(); + + + std::vector basis; + std::vector remaining; + private: + std::vector total; + int number; +}; + +#endif \ No newline at end of file diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index b54cb9f..1e2d1a4 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -19,6 +19,10 @@ const bool ParseRule::operator==(const ParseRule &other) { return( leftHandle == other.leftHandle && rightSide == other.rightSide && pointerIndex == other.pointerIndex ); } +const bool ParseRule::operator!=(const ParseRule &other) { + return !(this->operator==(other)); +} + ParseRule* ParseRule::clone() { return( new ParseRule(leftHandle, pointerIndex, rightSide) ); } @@ -39,8 +43,24 @@ std::vector ParseRule::getRightSide() { return rightSide; } +Symbol* ParseRule::getAtNextIndex() { + if (pointerIndex >= rightSide.size()) + return NULL; + return rightSide[pointerIndex]; +} + +Symbol* ParseRule::getAtIndex() { + if (pointerIndex < 1) + return NULL; + return rightSide[pointerIndex-1]; +} + +int ParseRule::getRightSize() { + return rightSide.size(); +} + int ParseRule::getIndex() { - return pointerIndex; + return pointerIndex-1; } bool ParseRule::advancePointer() { diff --git a/src/Parser.cpp b/src/Parser.cpp index dbeecb5..f86a77c 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -8,12 +8,6 @@ Parser::~Parser() { } -std::string Parser::intToString(int theInt) { - std::stringstream converter; - converter << theInt; - return converter.str(); -} - Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) { Symbol* symbol; if (symbols.find(symbolString) == symbols.end()) { @@ -59,95 +53,98 @@ void Parser::loadGrammer(std::string grammerInputString) { void Parser::createStateSet() { std::cout << "Begining creation of stateSet" << std::endl; - stateSets.push_back( new std::vector ); - stateSets[0]->push_back(loadedGrammer[0]); + stateSets.push_back( new State(0, loadedGrammer[0]) ); std::cout << "Begining for main set for loop" << std::endl; - for (std::vector< std::vector* >::size_type i = 0; i < stateSets.size(); i++) { - std::cout << "calling closure" << std::endl; + for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) { + std::cout << "calling closure on " << stateSets[i]->toString() << std::endl; closure(stateSets[i]); std::cout << "finished closure" << std::endl; std::cout << "Starting inner for loop that adds states" << std::endl; - for (std::vector::size_type j = 0; j < stateSets[i]->size(); j++) { + std::vector* allRules = stateSets[i]->getTotal(); + for (std::vector::size_type j = 0; j < allRules->size(); j++) { std::cout << "about to call addState" << std::endl; - addState(&stateSets, stateSets[i], (*stateSets[i])[j]->getRightSide()[(*stateSets[i])[j]->getIndex()]); - std::cout << "finished addState" << std::endl; + addState(&stateSets, stateSets[i], (*allRules)[j]->getAtNextIndex()); //Closure will be called in the outer loop } } } -void Parser::closure(std::vector* state) { +void Parser::closure(State* state) { //Add all the applicable rules. - for (std::vector::size_type i = 0; i < state->size(); i++) { + std::cout << "Closure on " << state->toString() << " is" << std::endl; + for (std::vector::size_type i = 0; i < state->getTotal()->size(); i++) { for (std::vector::size_type j = 0; j < loadedGrammer.size(); j++) { - if ((*state)[i]->getRightSide()[(*state)[i]->getIndex()] == loadedGrammer[j]->getLeftSide()) { + //If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side + if ((*state->getTotal())[i]->getAtNextIndex() != NULL && *((*state->getTotal())[i]->getAtNextIndex()) == *(loadedGrammer[j]->getLeftSide())) { + std::cout << (*state->getTotal())[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl; //Check to make sure not already in bool isAlreadyInState = false; - for (std::vector::size_type k = 0; k < state->size(); k++) { - if ((*state)[k] == loadedGrammer[j]) { + for (std::vector::size_type k = 0; k < state->getTotal()->size(); k++) { + if ((*state->getTotal())[k] == loadedGrammer[j]) { isAlreadyInState = true; break; } } if (!isAlreadyInState) - state->push_back(loadedGrammer[j]); + state->remaining.push_back(loadedGrammer[j]); } } } + std::cout << state->toString() << std::endl; } //Adds state if it doesn't already exist. -void Parser::addState(std::vector< std::vector* >* stateSets, std::vector* state, Symbol* symbol) { - std::vector* > newStates; +void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* symbol) { + std::vector< State* > newStates; //For each rule in the state we already have - for (std::vector::size_type i = 0; i < state->size(); i++) { + for (std::vector::size_type i = 0; i < state->getTotal()->size(); i++) { //Clone the current rule - ParseRule* advancedRule = (*state)[i]->clone(); + ParseRule* advancedRule = (*state->getTotal())[i]->clone(); //Try to advance the pointer if (advancedRule->advancePointer()) { - //If sucessful, check to see if this the advanced symbol is the basis for any of our new states + //Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state + + //So search our new states to see if any of them use this advanced symbol as a base. + //If so, add this rule to them. + //If not, create it. bool symbolAlreadyInState = false; - for (std::vector* >::size_type j = 0; j < newStates.size(); j++) { - if ((*newStates[j])[0]->getRightSide()[(*newStates[j])[0]->getIndex()] == advancedRule->getRightSide()[advancedRule->getIndex()]) { + for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) { + if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { symbolAlreadyInState = true; //So now check to see if this exact rule is in this state - bool ruleAlreadyInState = false; - for (std::vector::size_type k = 0; k < newStates[j]->size(); k++) { - if (*(*newStates[j])[k] == (*advancedRule) ) { - ruleAlreadyInState = true; - break; - } - } - if (!ruleAlreadyInState) { - newStates[j]->push_back(advancedRule); + if (!newStates[j]->containsRule(advancedRule)) { + newStates[j]->basis.push_back(advancedRule); } //We found a state with the same symbol, so stop searching break; } } if (!symbolAlreadyInState) { - std::vector* newState = new std::vector; - newState->push_back(advancedRule); + State* newState = new State(stateSets->size()+newStates.size(),advancedRule); newStates.push_back(newState); } } } - //Put all our new states in the set of states - for (std::vector< std::vector * >::size_type i = 0; i < newStates.size(); i++) { - stateSets->push_back(newStates[i]); + //Put all our new states in the set of states only if they're not already there. + bool stateAlreadyInAllStates = false; + for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { + for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) { + if (*(newStates[i]) == *((*stateSets)[j])) { + stateAlreadyInAllStates = true; + //std::cout << newStates[i]->toString() << " is equal to\n" << (*stateSets)[j]->toString() << std::endl; + } + } + if (!stateAlreadyInAllStates) { + stateSets->push_back(newStates[i]); + stateAlreadyInAllStates = false; + } } } std::string Parser::stateSetToString() { std::string concat = ""; - int currentNum = 0; - for (std::vector< std::vector *>::size_type i = 0; i < stateSets.size(); i++) { - concat += "State " + intToString(currentNum) + ":\n"; - for (std::vector::size_type j = 0; j < stateSets[i]->size(); j++) { - concat += "\t" + (*stateSets[i])[j]->toString() + "\n"; - } - concat += "\n"; - currentNum++; + for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) { + concat += stateSets[i]->toString(); } return concat; } diff --git a/src/State.cpp b/src/State.cpp new file mode 100644 index 0000000..c3c59f2 --- /dev/null +++ b/src/State.cpp @@ -0,0 +1,74 @@ +#include "State.h" + +State::State(int number, ParseRule* basis) { + this->number = number; + this->basis.push_back(basis); +} + +State::~State() { + +} + +const bool State::operator==(const State &other) { + //return (basis == other.basis && remaining == other.remaining); + if (basis.size() != other.basis.size()) + return false; + + for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) { + if (*(basis[i]) != *(other.basis[i])) + return false; + } + + if (remaining.size() != other.remaining.size()) + return false; + for (std::vector< ParseRule* >::size_type i = 0; i < remaining.size(); i++) { + if (remaining[i] != other.remaining[i]) + return false; + } + return true; +} + +const bool State::operator!=(const State &other) { + return !(this->operator==(other)); +} + +std::vector* State::getTotal() { + total.clear(); + for (std::vector::size_type i = 0; i < basis.size(); i++) { + total.push_back(basis[i]); + } + for (std::vector::size_type i = 0; i < remaining.size(); i++) { + total.push_back(remaining[i]); + } + return(&total); +} + +bool State::containsRule(ParseRule* rule) { + for (std::vector::size_type i = 0; i < basis.size(); i++) { + if (*rule == *(basis[i])) + return true; + } + for (std::vector::size_type i = 0; i < remaining.size(); i++) { + if (*rule == *(remaining[i])) + return true; + } + return false; +} + +std::string State::intToString(int theInt) { + std::stringstream converter; + converter << theInt; + return converter.str(); +} + +std::string State::toString() { + std::string concat = ""; + concat += "State " + intToString(number) + ":\n"; + for (std::vector::size_type j = 0; j < basis.size(); j++) { + concat += "\t" + basis[j]->toString() + "\n"; + } + for (std::vector::size_type j = 0; j < remaining.size(); j++) { + concat += "\t+\t" + remaining[j]->toString() + "\n"; + } + return concat; +} \ No newline at end of file