Fixed lots of bugs, added much more sane and efficient ParseAction table instead of recalculating every time. Fixed lots of bugs and inefficencies. Some temporary hacks in the table, mostly having to do with not having an EOF Symbol yet.

This commit is contained in:
Nathan Braswell
2013-06-04 19:50:16 -04:00
parent 0c4af245bf
commit 949dbc532a
10 changed files with 192 additions and 86 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp ) set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp )
include_directories( ${MY_INCLUDES} ) include_directories( ${MY_INCLUDES} )

View File

@@ -5,6 +5,7 @@
#define NULL 0 #define NULL 0
#endif #endif
#include "util.h"
#include "ParseRule.h" #include "ParseRule.h"
#include <vector> #include <vector>
@@ -17,6 +18,8 @@ class ParseAction {
ParseAction(ActionType action, ParseRule* reduceRule); ParseAction(ActionType action, ParseRule* reduceRule);
ParseAction(ActionType action, int shiftState); ParseAction(ActionType action, int shiftState);
~ParseAction(); ~ParseAction();
bool const operator==(const ParseAction &other);
bool const operator!=(const ParseAction &other);
std::string toString(); std::string toString();
static std::string actionToString(ActionType action); static std::string actionToString(ActionType action);

View File

@@ -5,6 +5,7 @@
#define NULL 0 #define NULL 0
#endif #endif
#include "util.h"
#include "ParseRule.h" #include "ParseRule.h"
#include "ParseAction.h" #include "ParseAction.h"
#include "Symbol.h" #include "Symbol.h"
@@ -27,14 +28,17 @@ class Parser {
void loadGrammer(std::string grammerInputString); void loadGrammer(std::string grammerInputString);
void createStateSet(); void createStateSet();
void closure(State* state); void closure(State* state);
void addState(std::vector< State* >* stateSets, State* state, Symbol*); void addStates(std::vector< State* >* stateSets, State* state);
std::string stateSetToString(); std::string stateSetToString();
int gotoTable(int state, Symbol* token); void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action);
ParseAction* actionTable(int state, Symbol* token); ParseAction* getTable(int state, Symbol* token);
NodeTree* parseInput(std::string inputString); NodeTree* parseInput(std::string inputString);
std::string grammerToString(); std::string grammerToString();
std::string grammerToDOT(); std::string grammerToDOT();
std::string tableToString();
private: private:
StringReader reader; StringReader reader;
std::map<std::string, Symbol*> symbols; std::map<std::string, Symbol*> symbols;
@@ -42,7 +46,8 @@ class Parser {
std::vector< State* > stateSets; std::vector< State* > stateSets;
//std::vector< std::vector<ParseAction*> > std::vector< std::vector<ParseAction*>* > table;
std::vector<Symbol*> symbolIndexVec;
std::stack<int> stateStack; std::stack<int> stateStack;
std::stack<Symbol*> symbolStack; std::stack<Symbol*> symbolStack;

View File

@@ -5,6 +5,7 @@
#define NULL 0 #define NULL 0
#endif #endif
#include "util.h"
#include "ParseRule.h" #include "ParseRule.h"
#include <vector> #include <vector>
@@ -16,7 +17,6 @@ class State {
public: public:
State(int number, ParseRule* basis); State(int number, ParseRule* basis);
~State(); ~State();
std::string intToString(int theInt);
bool const operator==(const State &other); bool const operator==(const State &other);
bool const operator!=(const State &other); bool const operator!=(const State &other);
std::vector<ParseRule*>* getBasis(); std::vector<ParseRule*>* getBasis();

9
include/util.h Normal file
View File

@@ -0,0 +1,9 @@
#ifndef UTIL_H
#define UTIL_H
#include <string>
#include <sstream>
std::string intToString(int theInt);
#endif

View File

@@ -58,6 +58,7 @@ int main(int argc, char* argv[]) {
//std::cout << "Doing stateSetToString from Main" << std::endl; //std::cout << "Doing stateSetToString from Main" << std::endl;
std::cout << parser.stateSetToString() << std::endl; std::cout << parser.stateSetToString() << std::endl;
//std::cout << "finished stateSetToString from Main" << std::endl; //std::cout << "finished stateSetToString from Main" << std::endl;
std::cout << parser.tableToString() << std::endl;
std::cout << grammerInputFileString << std::endl; std::cout << grammerInputFileString << std::endl;
std::cout << parser.grammerToString() << std::endl; std::cout << parser.grammerToString() << std::endl;

View File

@@ -22,6 +22,15 @@ ParseAction::~ParseAction() {
} }
const bool ParseAction::operator==(const ParseAction &other) {
return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState);
}
const bool ParseAction::operator!=(const ParseAction &other) {
return !(this->operator==(other));
}
std::string ParseAction::actionToString(ActionType action) { std::string ParseAction::actionToString(ActionType action) {
switch (action) { switch (action) {
case REDUCE: case REDUCE:
@@ -40,10 +49,11 @@ std::string ParseAction::actionToString(ActionType action) {
} }
std::string ParseAction::toString() { std::string ParseAction::toString() {
std::string outputString = actionToString(action); std::string outputString = "";
if (reduceRule) outputString += actionToString(action);
outputString += " " + reduceRule->toString(); if (reduceRule != NULL)
if (shiftState) outputString += " " + reduceRule->toString();
outputString += " " + shiftState; if (shiftState != -1)
outputString += " " + intToString(shiftState);
return(outputString); return(outputString);
} }

View File

@@ -56,16 +56,10 @@ void Parser::createStateSet() {
stateSets.push_back( new State(0, loadedGrammer[0]) ); stateSets.push_back( new State(0, loadedGrammer[0]) );
//std::cout << "Begining for main set for loop" << std::endl; //std::cout << "Begining for main set for loop" << std::endl;
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) { for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
//std::cout << "calling closure on " << stateSets[i]->toString() << std::endl; //closure
closure(stateSets[i]); closure(stateSets[i]);
//std::cout << "finished closure" << std::endl; //Add the new states
//std::cout << "Starting inner for loop that adds states" << std::endl; addStates(&stateSets, stateSets[i]);
std::vector<ParseRule*>* allRules = stateSets[i]->getTotal();
for (std::vector<ParseRule*>::size_type j = 0; j < allRules->size(); j++) {
//std::cout << "about to call addState" << std::endl;
addState(&stateSets, stateSets[i], (*allRules)[j]->getAtNextIndex());
//Closure will be called in the outer loop
}
} }
} }
@@ -94,13 +88,14 @@ void Parser::closure(State* state) {
} }
//Adds state if it doesn't already exist. //Adds state if it doesn't already exist.
void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* symbol) { void Parser::addStates(std::vector< State* >* stateSets, State* state) {
std::vector< State* > newStates; std::vector< State* > newStates;
//For each rule in the state we already have //For each rule in the state we already have
for (std::vector<ParseRule*>::size_type i = 0; i < state->getTotal()->size(); i++) { std::vector<ParseRule*>* currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
//Clone the current rule //Clone the current rule
ParseRule* advancedRule = (*state->getTotal())[i]->clone(); ParseRule* advancedRule = (*currStateTotal)[i]->clone();
//Try to advance the pointer //Try to advance the pointer, if sucessful see if it is the correct next symbol
if (advancedRule->advancePointer()) { if (advancedRule->advancePointer()) {
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state //Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
@@ -112,9 +107,8 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy
if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) {
symbolAlreadyInState = true; symbolAlreadyInState = true;
//So now check to see if this exact rule is in this state //So now check to see if this exact rule is in this state
if (!newStates[j]->containsRule(advancedRule)) { if (!newStates[j]->containsRule(advancedRule))
newStates[j]->basis.push_back(advancedRule); newStates[j]->basis.push_back(advancedRule);
}
//We found a state with the same symbol, so stop searching //We found a state with the same symbol, so stop searching
break; break;
} }
@@ -124,19 +118,36 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy
newStates.push_back(newState); newStates.push_back(newState);
} }
} }
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
if ((*currStateTotal)[i]->isAtEnd()) {
std::cout << (*currStateTotal)[i]->toString() << " is at end, adding reduce to table" << std::endl;
//This should iterate through the follow set, but right now is LR(0), so all symbols
for (std::vector<Symbol*>::size_type j = 0; j < symbolIndexVec.size(); j++)
addToTable(state, symbolIndexVec[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
} else {
std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl;
}
} }
//Put all our new states in the set of states only if they're not already there. //Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false; bool stateAlreadyInAllStates = false;
Symbol* currStateSymbol;
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) { for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
if (*(newStates[i]) == *((*stateSets)[j])) { if (*(newStates[i]) == *((*stateSets)[j])) {
stateAlreadyInAllStates = true; stateAlreadyInAllStates = true;
//std::cout << newStates[i]->toString() << " is equal to\n" << (*stateSets)[j]->toString() << std::endl; //If it does exist, we should add it as the shift/goto in the action table
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
} }
} }
if (!stateAlreadyInAllStates) { if (!stateAlreadyInAllStates) {
stateSets->push_back(newStates[i]); stateSets->push_back(newStates[i]);
stateAlreadyInAllStates = false; stateAlreadyInAllStates = false;
//If the state does not already exist, add it and add it as the shift/goto in the action table
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
} }
} }
} }
@@ -149,62 +160,116 @@ std::string Parser::stateSetToString() {
return concat; return concat;
} }
int Parser::gotoTable(int state, Symbol* token) { void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
std::vector<ParseRule*> allInState = *(stateSets[state]->getTotal());
ParseRule* currentRule; //find what state num the from state is
for (std::vector<ParseRule*>::size_type i = 0; i < allInState.size(); i++) { int stateNum = -1;
currentRule = allInState[i]; for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
if (*(currentRule->getAtNextIndex()) == *token) { if (*(stateSets[i]) == *fromState) {
ParseRule* advancedCurrent = currentRule->clone(); stateNum = i;
advancedCurrent->advancePointer(); break;
for (std::vector<State*>::size_type j = 0; j < stateSets.size(); j++) {
for (std::vector<ParseRule*>::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) {
if ( *(stateSets[j]->basis[k]) == *advancedCurrent)
return(j);
}
}
} }
} }
return(-1);
//std::cout << "stateNum is " << stateNum << std::endl;
//If state not in table, add up to and it.
//std::cout << "table size is " << table.size() <<std::endl;
while (stateNum >= table.size()) {
//std::cout << "Pushing back table" << std::endl;
table.push_back(new std::vector<ParseAction*>);
}
//find out what index this symbol is on
int symbolIndex = -1;
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( *(symbolIndexVec[i]) == *tranSymbol ) {
//Has been found
symbolIndex = i;
break;
}
}
//std::cout << "symbolIndex is " << symbolIndex << std::endl;
//If we've never done this symbol, add it
if (symbolIndex < 0) {
// std::cout << "pushing back symbolIndexVec" <<std::endl;
symbolIndex = symbolIndexVec.size();
symbolIndexVec.push_back(tranSymbol);
}
//std::cout << "symbolIndex is " << symbolIndex << " which is " << symbolIndexVec[symbolIndex]->toString() << std::endl;
//std::cout << table[stateNum] << " ";
while (symbolIndex >= table[stateNum]->size()) {
table[stateNum]->push_back(NULL);
}
//If this table slot is empty
//std::cout << "table[stateNum] is " << table[stateNum] << std::endl;
//std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl;
if ( (*(table[stateNum]))[symbolIndex] == NULL ) {
std::cout << "Null, adding " << action->toString() << std::endl;
(*(table[stateNum]))[symbolIndex] = action;
}
//If the slot is not empty and does not contain ourself, then it is a conflict
else if ( *((*(table[stateNum]))[symbolIndex]) != *action) {
std::cout << "not Null!" << std::endl;
std::cout << "Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl;
//Don't overwrite
//(*(table[stateNum]))[symbolIndex] = action;
}
} }
ParseAction* Parser::actionTable(int state, Symbol* token) { std::string Parser::tableToString() {
std::vector<ParseRule*>* allStateRules = stateSets[state]->getTotal(); std::string concat = "";
ParseRule* currentRule; for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++)
concat += "\t" + symbolIndexVec[i]->toString();
concat += "\n";
//Get the completed Goal rule for comparision to see if we need to accept for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) {
ParseRule* completedGoal = stateSets[0]->basis[0]->clone(); concat += intToString(i) + "\t";
while (completedGoal->advancePointer()) {} for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) {
if ( (*(table[i]))[j] != NULL)
for (std::vector<ParseRule*>::size_type i = 0; i < allStateRules->size(); i++) { concat += (*(table[i]))[j]->toString() + "\t";
currentRule = (*allStateRules)[i]; else
concat += "NULL\t";
//If the current rule in the state is completed, then do a reduce action
if (currentRule->isAtEnd()) {
//But first, if our advanced rule is equal to the completedGoal, we accept
if (*currentRule == *completedGoal)
return new ParseAction(ParseAction::ACCEPT);
return new ParseAction(ParseAction::REDUCE, currentRule);
} }
concat += "\n";
//If the current rule in the state is not completed, see if it has the next correct token
//std::cout << currentRule->getAtNextIndex()->toString() << " comp to " << token->toString() << std::endl;
if ( *(currentRule->getAtNextIndex()) == *token){
//If it does have the correct next token, then find the state that has this rule advanced as basis, that is the state we shift to
//Goes to n^2 here, really need that table
ParseRule* advancedCurrent = currentRule->clone();
advancedCurrent->advancePointer();
for (std::vector<State*>::size_type j = 0; j < stateSets.size(); j++) {
for (std::vector<ParseRule*>::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) {
if ( *(stateSets[j]->basis[k]) == *advancedCurrent)
return new ParseAction(ParseAction::SHIFT, j);
}
}
}
} }
return new ParseAction(ParseAction::REJECT); return(concat);
}
ParseAction* Parser::getTable(int state, Symbol* token) {
int symbolIndex = -1;
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( *(symbolIndexVec[i]) == *token) {
symbolIndex = i;
break;
}
}
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec
//(This assumes singular goal assignment, a simplification for now)
if (state == 1 && symbolIndex == -1)
return(new ParseAction(ParseAction::ACCEPT));
//Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol
if (symbolIndex == -1)
symbolIndex = 0;
//If ourside the symbol range of this state (same as NULL), reject
if ( symbolIndex >= table[state]->size() )
return(new ParseAction(ParseAction::REJECT));
ParseAction* action = (*(table[state]))[symbolIndex];
//If null, reject. (this is a space with no other action)
if (action == NULL)
return(new ParseAction(ParseAction::REJECT));
//Otherwise, we have something, so return it
return (action);
} }
NodeTree* Parser::parseInput(std::string inputString) { NodeTree* Parser::parseInput(std::string inputString) {
@@ -217,10 +282,13 @@ NodeTree* Parser::parseInput(std::string inputString) {
symbolStack.push(new Symbol("INVALID", false)); symbolStack.push(new Symbol("INVALID", false));
while (true) { while (true) {
action = actionTable(stateStack.top(), token); std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
action = getTable(stateStack.top(), token);
switch (action->action) { switch (action->action) {
case ParseAction::REDUCE: case ParseAction::REDUCE:
{ {
std::cout << "Reduce by " << action->reduceRule->toString() << std::endl;
int rightSideLength = action->reduceRule->getRightSide().size(); int rightSideLength = action->reduceRule->getRightSide().size();
//Keep track of symbols popped for parse tree //Keep track of symbols popped for parse tree
std::vector<Symbol*> poppedSymbols; std::vector<Symbol*> poppedSymbols;
@@ -234,15 +302,18 @@ NodeTree* Parser::parseInput(std::string inputString) {
Symbol* newSymbol = action->reduceRule->getLeftSide()->clone(); Symbol* newSymbol = action->reduceRule->getLeftSide()->clone();
newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols)); newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols));
symbolStack.push(newSymbol); symbolStack.push(newSymbol);
stateStack.push(gotoTable(stateStack.top(), symbolStack.top())); std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
std::cout << "Reduce by " << action->reduceRule->toString() << std::endl; stateStack.push(getTable(stateStack.top(), symbolStack.top())->shiftState);
std::cout << "Reduced, now condition is" << std::endl;
std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
break; break;
} }
case ParseAction::SHIFT: case ParseAction::SHIFT:
std::cout << "Shift " << token->toString() << std::endl;
symbolStack.push(token); symbolStack.push(token);
token = new Symbol("\""+inputReader.word()+"\"", true); token = new Symbol("\""+inputReader.word()+"\"", true);
stateStack.push(action->shiftState); stateStack.push(action->shiftState);
std::cout << "Shift " << symbolStack.top()->toString() << std::endl;
break; break;
case ParseAction::ACCEPT: case ParseAction::ACCEPT:
std::cout << "ACCEPTED!" << std::endl; std::cout << "ACCEPTED!" << std::endl;

View File

@@ -42,6 +42,12 @@ std::vector<ParseRule*>* State::getTotal() {
} }
return(&total); return(&total);
} }
std::vector<ParseRule*>* State::getBasis() {
return &basis;
}
std::vector<ParseRule*>* State::getRemaining() {
return &remaining;
}
bool State::containsRule(ParseRule* rule) { bool State::containsRule(ParseRule* rule) {
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) { for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
@@ -55,12 +61,6 @@ bool State::containsRule(ParseRule* rule) {
return false; return false;
} }
std::string State::intToString(int theInt) {
std::stringstream converter;
converter << theInt;
return converter.str();
}
std::string State::toString() { std::string State::toString() {
std::string concat = ""; std::string concat = "";
concat += "State " + intToString(number) + ":\n"; concat += "State " + intToString(number) + ":\n";

7
src/util.cpp Normal file
View File

@@ -0,0 +1,7 @@
#include "util.h"
std::string intToString(int theInt) {
std::stringstream converter;
converter << theInt;
return converter.str();
}