diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d3a103..f215870 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ParseAction.h b/include/ParseAction.h index c421b44..e7103f0 100644 --- a/include/ParseAction.h +++ b/include/ParseAction.h @@ -5,6 +5,7 @@ #define NULL 0 #endif +#include "util.h" #include "ParseRule.h" #include @@ -17,6 +18,8 @@ class ParseAction { ParseAction(ActionType action, ParseRule* reduceRule); ParseAction(ActionType action, int shiftState); ~ParseAction(); + bool const operator==(const ParseAction &other); + bool const operator!=(const ParseAction &other); std::string toString(); static std::string actionToString(ActionType action); diff --git a/include/Parser.h b/include/Parser.h index ce3f6ff..b70c99d 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -5,6 +5,7 @@ #define NULL 0 #endif +#include "util.h" #include "ParseRule.h" #include "ParseAction.h" #include "Symbol.h" @@ -27,14 +28,17 @@ class Parser { void loadGrammer(std::string grammerInputString); void createStateSet(); void closure(State* state); - void addState(std::vector< State* >* stateSets, State* state, Symbol*); + void addStates(std::vector< State* >* stateSets, State* state); std::string stateSetToString(); - int gotoTable(int state, Symbol* token); - ParseAction* actionTable(int state, Symbol* token); + void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action); + ParseAction* getTable(int state, Symbol* token); NodeTree* parseInput(std::string inputString); std::string grammerToString(); std::string grammerToDOT(); + + std::string tableToString(); + private: StringReader reader; std::map symbols; @@ -42,7 +46,8 @@ class Parser { std::vector< State* > stateSets; - //std::vector< std::vector > + std::vector< std::vector* > table; + std::vector symbolIndexVec; std::stack stateStack; std::stack symbolStack; diff --git a/include/State.h b/include/State.h index ece15f2..8e62638 100644 --- a/include/State.h +++ b/include/State.h @@ -5,6 +5,7 @@ #define NULL 0 #endif +#include "util.h" #include "ParseRule.h" #include @@ -16,7 +17,6 @@ class State { public: State(int number, ParseRule* basis); ~State(); - std::string intToString(int theInt); bool const operator==(const State &other); bool const operator!=(const State &other); std::vector* getBasis(); diff --git a/include/util.h b/include/util.h new file mode 100644 index 0000000..0a79c2d --- /dev/null +++ b/include/util.h @@ -0,0 +1,9 @@ +#ifndef UTIL_H +#define UTIL_H + +#include +#include + +std::string intToString(int theInt); + +#endif \ No newline at end of file diff --git a/main.cpp b/main.cpp index f49afab..5f992b7 100644 --- a/main.cpp +++ b/main.cpp @@ -58,6 +58,7 @@ int main(int argc, char* argv[]) { //std::cout << "Doing stateSetToString from Main" << std::endl; std::cout << parser.stateSetToString() << std::endl; //std::cout << "finished stateSetToString from Main" << std::endl; + std::cout << parser.tableToString() << std::endl; std::cout << grammerInputFileString << std::endl; std::cout << parser.grammerToString() << std::endl; diff --git a/src/ParseAction.cpp b/src/ParseAction.cpp index 6296d58..dd9193a 100644 --- a/src/ParseAction.cpp +++ b/src/ParseAction.cpp @@ -22,6 +22,15 @@ ParseAction::~ParseAction() { } + +const bool ParseAction::operator==(const ParseAction &other) { + return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState); +} + +const bool ParseAction::operator!=(const ParseAction &other) { + return !(this->operator==(other)); +} + std::string ParseAction::actionToString(ActionType action) { switch (action) { case REDUCE: @@ -40,10 +49,11 @@ std::string ParseAction::actionToString(ActionType action) { } std::string ParseAction::toString() { - std::string outputString = actionToString(action); - if (reduceRule) - outputString += " " + reduceRule->toString(); - if (shiftState) - outputString += " " + shiftState; + std::string outputString = ""; + outputString += actionToString(action); + if (reduceRule != NULL) + outputString += " " + reduceRule->toString(); + if (shiftState != -1) + outputString += " " + intToString(shiftState); return(outputString); } \ No newline at end of file diff --git a/src/Parser.cpp b/src/Parser.cpp index caa506b..25ea7ee 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -56,16 +56,10 @@ void Parser::createStateSet() { stateSets.push_back( new State(0, loadedGrammer[0]) ); //std::cout << "Begining for main set for loop" << std::endl; for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) { - //std::cout << "calling closure on " << stateSets[i]->toString() << std::endl; + //closure closure(stateSets[i]); - //std::cout << "finished closure" << std::endl; - //std::cout << "Starting inner for loop that adds states" << std::endl; - std::vector* allRules = stateSets[i]->getTotal(); - for (std::vector::size_type j = 0; j < allRules->size(); j++) { - //std::cout << "about to call addState" << std::endl; - addState(&stateSets, stateSets[i], (*allRules)[j]->getAtNextIndex()); - //Closure will be called in the outer loop - } + //Add the new states + addStates(&stateSets, stateSets[i]); } } @@ -94,13 +88,14 @@ void Parser::closure(State* state) { } //Adds state if it doesn't already exist. -void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* symbol) { +void Parser::addStates(std::vector< State* >* stateSets, State* state) { std::vector< State* > newStates; //For each rule in the state we already have - for (std::vector::size_type i = 0; i < state->getTotal()->size(); i++) { + std::vector* currStateTotal = state->getTotal(); + for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { //Clone the current rule - ParseRule* advancedRule = (*state->getTotal())[i]->clone(); - //Try to advance the pointer + ParseRule* advancedRule = (*currStateTotal)[i]->clone(); + //Try to advance the pointer, if sucessful see if it is the correct next symbol if (advancedRule->advancePointer()) { //Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state @@ -112,9 +107,8 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { symbolAlreadyInState = true; //So now check to see if this exact rule is in this state - if (!newStates[j]->containsRule(advancedRule)) { + if (!newStates[j]->containsRule(advancedRule)) newStates[j]->basis.push_back(advancedRule); - } //We found a state with the same symbol, so stop searching break; } @@ -124,19 +118,36 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy newStates.push_back(newState); } } + //Also add any completed rules as reduces in the action table + //See if reduce + //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... + if ((*currStateTotal)[i]->isAtEnd()) { + std::cout << (*currStateTotal)[i]->toString() << " is at end, adding reduce to table" << std::endl; + //This should iterate through the follow set, but right now is LR(0), so all symbols + for (std::vector::size_type j = 0; j < symbolIndexVec.size(); j++) + addToTable(state, symbolIndexVec[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + } else { + std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl; + } } //Put all our new states in the set of states only if they're not already there. bool stateAlreadyInAllStates = false; + Symbol* currStateSymbol; for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { + currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex(); for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) { if (*(newStates[i]) == *((*stateSets)[j])) { stateAlreadyInAllStates = true; - //std::cout << newStates[i]->toString() << " is equal to\n" << (*stateSets)[j]->toString() << std::endl; + //If it does exist, we should add it as the shift/goto in the action table + addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); + break; } } if (!stateAlreadyInAllStates) { stateSets->push_back(newStates[i]); stateAlreadyInAllStates = false; + //If the state does not already exist, add it and add it as the shift/goto in the action table + addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1)); } } } @@ -149,62 +160,116 @@ std::string Parser::stateSetToString() { return concat; } -int Parser::gotoTable(int state, Symbol* token) { - std::vector allInState = *(stateSets[state]->getTotal()); - ParseRule* currentRule; - for (std::vector::size_type i = 0; i < allInState.size(); i++) { - currentRule = allInState[i]; - if (*(currentRule->getAtNextIndex()) == *token) { - ParseRule* advancedCurrent = currentRule->clone(); - advancedCurrent->advancePointer(); - for (std::vector::size_type j = 0; j < stateSets.size(); j++) { - for (std::vector::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) { - if ( *(stateSets[j]->basis[k]) == *advancedCurrent) - return(j); - } - } +void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) { + + //find what state num the from state is + int stateNum = -1; + for (std::vector::size_type i = 0; i < stateSets.size(); i++) { + if (*(stateSets[i]) == *fromState) { + stateNum = i; + break; } } - return(-1); + + //std::cout << "stateNum is " << stateNum << std::endl; + + //If state not in table, add up to and it. + //std::cout << "table size is " << table.size() <= table.size()) { + //std::cout << "Pushing back table" << std::endl; + table.push_back(new std::vector); + } + + //find out what index this symbol is on + int symbolIndex = -1; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( *(symbolIndexVec[i]) == *tranSymbol ) { + //Has been found + symbolIndex = i; + break; + } + } + //std::cout << "symbolIndex is " << symbolIndex << std::endl; + + //If we've never done this symbol, add it + if (symbolIndex < 0) { + // std::cout << "pushing back symbolIndexVec" <toString() << std::endl; + + //std::cout << table[stateNum] << " "; + while (symbolIndex >= table[stateNum]->size()) { + table[stateNum]->push_back(NULL); + } + + //If this table slot is empty + //std::cout << "table[stateNum] is " << table[stateNum] << std::endl; + //std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl; + + if ( (*(table[stateNum]))[symbolIndex] == NULL ) { + std::cout << "Null, adding " << action->toString() << std::endl; + (*(table[stateNum]))[symbolIndex] = action; + } + //If the slot is not empty and does not contain ourself, then it is a conflict + else if ( *((*(table[stateNum]))[symbolIndex]) != *action) { + std::cout << "not Null!" << std::endl; + std::cout << "Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl; + //Don't overwrite + //(*(table[stateNum]))[symbolIndex] = action; + } } -ParseAction* Parser::actionTable(int state, Symbol* token) { - std::vector* allStateRules = stateSets[state]->getTotal(); - ParseRule* currentRule; +std::string Parser::tableToString() { + std::string concat = ""; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) + concat += "\t" + symbolIndexVec[i]->toString(); + concat += "\n"; - //Get the completed Goal rule for comparision to see if we need to accept - ParseRule* completedGoal = stateSets[0]->basis[0]->clone(); - while (completedGoal->advancePointer()) {} - - for (std::vector::size_type i = 0; i < allStateRules->size(); i++) { - currentRule = (*allStateRules)[i]; - - //If the current rule in the state is completed, then do a reduce action - if (currentRule->isAtEnd()) { - //But first, if our advanced rule is equal to the completedGoal, we accept - if (*currentRule == *completedGoal) - return new ParseAction(ParseAction::ACCEPT); - return new ParseAction(ParseAction::REDUCE, currentRule); + for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) { + concat += intToString(i) + "\t"; + for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) { + if ( (*(table[i]))[j] != NULL) + concat += (*(table[i]))[j]->toString() + "\t"; + else + concat += "NULL\t"; } - - //If the current rule in the state is not completed, see if it has the next correct token - //std::cout << currentRule->getAtNextIndex()->toString() << " comp to " << token->toString() << std::endl; - if ( *(currentRule->getAtNextIndex()) == *token){ - //If it does have the correct next token, then find the state that has this rule advanced as basis, that is the state we shift to - //Goes to n^2 here, really need that table - ParseRule* advancedCurrent = currentRule->clone(); - advancedCurrent->advancePointer(); - - for (std::vector::size_type j = 0; j < stateSets.size(); j++) { - for (std::vector::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) { - if ( *(stateSets[j]->basis[k]) == *advancedCurrent) - return new ParseAction(ParseAction::SHIFT, j); - } - } - } - + concat += "\n"; } - return new ParseAction(ParseAction::REJECT); + return(concat); +} + +ParseAction* Parser::getTable(int state, Symbol* token) { + int symbolIndex = -1; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( *(symbolIndexVec[i]) == *token) { + symbolIndex = i; + break; + } + } + + //This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec + //(This assumes singular goal assignment, a simplification for now) + if (state == 1 && symbolIndex == -1) + return(new ParseAction(ParseAction::ACCEPT)); + + //Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol + if (symbolIndex == -1) + symbolIndex = 0; + + //If ourside the symbol range of this state (same as NULL), reject + if ( symbolIndex >= table[state]->size() ) + return(new ParseAction(ParseAction::REJECT)); + + ParseAction* action = (*(table[state]))[symbolIndex]; + //If null, reject. (this is a space with no other action) + if (action == NULL) + return(new ParseAction(ParseAction::REJECT)); + + //Otherwise, we have something, so return it + return (action); } NodeTree* Parser::parseInput(std::string inputString) { @@ -217,10 +282,13 @@ NodeTree* Parser::parseInput(std::string inputString) { symbolStack.push(new Symbol("INVALID", false)); while (true) { - action = actionTable(stateStack.top(), token); + std::cout << "In state: " << intToString(stateStack.top()) << std::endl; + action = getTable(stateStack.top(), token); switch (action->action) { case ParseAction::REDUCE: { + std::cout << "Reduce by " << action->reduceRule->toString() << std::endl; + int rightSideLength = action->reduceRule->getRightSide().size(); //Keep track of symbols popped for parse tree std::vector poppedSymbols; @@ -234,15 +302,18 @@ NodeTree* Parser::parseInput(std::string inputString) { Symbol* newSymbol = action->reduceRule->getLeftSide()->clone(); newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols)); symbolStack.push(newSymbol); - stateStack.push(gotoTable(stateStack.top(), symbolStack.top())); - std::cout << "Reduce by " << action->reduceRule->toString() << std::endl; + std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; + stateStack.push(getTable(stateStack.top(), symbolStack.top())->shiftState); + std::cout << "Reduced, now condition is" << std::endl; + std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; break; } case ParseAction::SHIFT: + std::cout << "Shift " << token->toString() << std::endl; + symbolStack.push(token); token = new Symbol("\""+inputReader.word()+"\"", true); stateStack.push(action->shiftState); - std::cout << "Shift " << symbolStack.top()->toString() << std::endl; break; case ParseAction::ACCEPT: std::cout << "ACCEPTED!" << std::endl; diff --git a/src/State.cpp b/src/State.cpp index c3c59f2..b9e82f5 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -42,6 +42,12 @@ std::vector* State::getTotal() { } return(&total); } +std::vector* State::getBasis() { + return &basis; +} +std::vector* State::getRemaining() { + return &remaining; +} bool State::containsRule(ParseRule* rule) { for (std::vector::size_type i = 0; i < basis.size(); i++) { @@ -55,12 +61,6 @@ bool State::containsRule(ParseRule* rule) { return false; } -std::string State::intToString(int theInt) { - std::stringstream converter; - converter << theInt; - return converter.str(); -} - std::string State::toString() { std::string concat = ""; concat += "State " + intToString(number) + ":\n"; diff --git a/src/util.cpp b/src/util.cpp new file mode 100644 index 0000000..2b69251 --- /dev/null +++ b/src/util.cpp @@ -0,0 +1,7 @@ +#include "util.h" + +std::string intToString(int theInt) { + std::stringstream converter; + converter << theInt; + return converter.str(); +} \ No newline at end of file