Fixed lots of bugs, added much more sane and efficient ParseAction table instead of recalculating every time. Fixed lots of bugs and inefficencies. Some temporary hacks in the table, mostly having to do with not having an EOF Symbol yet.

This commit is contained in:
Nathan Braswell
2013-06-04 19:50:16 -04:00
parent 0c4af245bf
commit 949dbc532a
10 changed files with 192 additions and 86 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp )
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp )
include_directories( ${MY_INCLUDES} )

View File

@@ -5,6 +5,7 @@
#define NULL 0
#endif
#include "util.h"
#include "ParseRule.h"
#include <vector>
@@ -17,6 +18,8 @@ class ParseAction {
ParseAction(ActionType action, ParseRule* reduceRule);
ParseAction(ActionType action, int shiftState);
~ParseAction();
bool const operator==(const ParseAction &other);
bool const operator!=(const ParseAction &other);
std::string toString();
static std::string actionToString(ActionType action);

View File

@@ -5,6 +5,7 @@
#define NULL 0
#endif
#include "util.h"
#include "ParseRule.h"
#include "ParseAction.h"
#include "Symbol.h"
@@ -27,14 +28,17 @@ class Parser {
void loadGrammer(std::string grammerInputString);
void createStateSet();
void closure(State* state);
void addState(std::vector< State* >* stateSets, State* state, Symbol*);
void addStates(std::vector< State* >* stateSets, State* state);
std::string stateSetToString();
int gotoTable(int state, Symbol* token);
ParseAction* actionTable(int state, Symbol* token);
void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action);
ParseAction* getTable(int state, Symbol* token);
NodeTree* parseInput(std::string inputString);
std::string grammerToString();
std::string grammerToDOT();
std::string tableToString();
private:
StringReader reader;
std::map<std::string, Symbol*> symbols;
@@ -42,7 +46,8 @@ class Parser {
std::vector< State* > stateSets;
//std::vector< std::vector<ParseAction*> >
std::vector< std::vector<ParseAction*>* > table;
std::vector<Symbol*> symbolIndexVec;
std::stack<int> stateStack;
std::stack<Symbol*> symbolStack;

View File

@@ -5,6 +5,7 @@
#define NULL 0
#endif
#include "util.h"
#include "ParseRule.h"
#include <vector>
@@ -16,7 +17,6 @@ class State {
public:
State(int number, ParseRule* basis);
~State();
std::string intToString(int theInt);
bool const operator==(const State &other);
bool const operator!=(const State &other);
std::vector<ParseRule*>* getBasis();

9
include/util.h Normal file
View File

@@ -0,0 +1,9 @@
#ifndef UTIL_H
#define UTIL_H
#include <string>
#include <sstream>
std::string intToString(int theInt);
#endif

View File

@@ -58,6 +58,7 @@ int main(int argc, char* argv[]) {
//std::cout << "Doing stateSetToString from Main" << std::endl;
std::cout << parser.stateSetToString() << std::endl;
//std::cout << "finished stateSetToString from Main" << std::endl;
std::cout << parser.tableToString() << std::endl;
std::cout << grammerInputFileString << std::endl;
std::cout << parser.grammerToString() << std::endl;

View File

@@ -22,6 +22,15 @@ ParseAction::~ParseAction() {
}
const bool ParseAction::operator==(const ParseAction &other) {
return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState);
}
const bool ParseAction::operator!=(const ParseAction &other) {
return !(this->operator==(other));
}
std::string ParseAction::actionToString(ActionType action) {
switch (action) {
case REDUCE:
@@ -40,10 +49,11 @@ std::string ParseAction::actionToString(ActionType action) {
}
std::string ParseAction::toString() {
std::string outputString = actionToString(action);
if (reduceRule)
outputString += " " + reduceRule->toString();
if (shiftState)
outputString += " " + shiftState;
std::string outputString = "";
outputString += actionToString(action);
if (reduceRule != NULL)
outputString += " " + reduceRule->toString();
if (shiftState != -1)
outputString += " " + intToString(shiftState);
return(outputString);
}

View File

@@ -56,16 +56,10 @@ void Parser::createStateSet() {
stateSets.push_back( new State(0, loadedGrammer[0]) );
//std::cout << "Begining for main set for loop" << std::endl;
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
//std::cout << "calling closure on " << stateSets[i]->toString() << std::endl;
//closure
closure(stateSets[i]);
//std::cout << "finished closure" << std::endl;
//std::cout << "Starting inner for loop that adds states" << std::endl;
std::vector<ParseRule*>* allRules = stateSets[i]->getTotal();
for (std::vector<ParseRule*>::size_type j = 0; j < allRules->size(); j++) {
//std::cout << "about to call addState" << std::endl;
addState(&stateSets, stateSets[i], (*allRules)[j]->getAtNextIndex());
//Closure will be called in the outer loop
}
//Add the new states
addStates(&stateSets, stateSets[i]);
}
}
@@ -94,13 +88,14 @@ void Parser::closure(State* state) {
}
//Adds state if it doesn't already exist.
void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* symbol) {
void Parser::addStates(std::vector< State* >* stateSets, State* state) {
std::vector< State* > newStates;
//For each rule in the state we already have
for (std::vector<ParseRule*>::size_type i = 0; i < state->getTotal()->size(); i++) {
std::vector<ParseRule*>* currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
//Clone the current rule
ParseRule* advancedRule = (*state->getTotal())[i]->clone();
//Try to advance the pointer
ParseRule* advancedRule = (*currStateTotal)[i]->clone();
//Try to advance the pointer, if sucessful see if it is the correct next symbol
if (advancedRule->advancePointer()) {
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
@@ -112,9 +107,8 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy
if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) {
symbolAlreadyInState = true;
//So now check to see if this exact rule is in this state
if (!newStates[j]->containsRule(advancedRule)) {
if (!newStates[j]->containsRule(advancedRule))
newStates[j]->basis.push_back(advancedRule);
}
//We found a state with the same symbol, so stop searching
break;
}
@@ -124,19 +118,36 @@ void Parser::addState(std::vector< State* >* stateSets, State* state, Symbol* sy
newStates.push_back(newState);
}
}
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
if ((*currStateTotal)[i]->isAtEnd()) {
std::cout << (*currStateTotal)[i]->toString() << " is at end, adding reduce to table" << std::endl;
//This should iterate through the follow set, but right now is LR(0), so all symbols
for (std::vector<Symbol*>::size_type j = 0; j < symbolIndexVec.size(); j++)
addToTable(state, symbolIndexVec[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
} else {
std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl;
}
}
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false;
Symbol* currStateSymbol;
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
if (*(newStates[i]) == *((*stateSets)[j])) {
stateAlreadyInAllStates = true;
//std::cout << newStates[i]->toString() << " is equal to\n" << (*stateSets)[j]->toString() << std::endl;
//If it does exist, we should add it as the shift/goto in the action table
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
}
}
if (!stateAlreadyInAllStates) {
stateSets->push_back(newStates[i]);
stateAlreadyInAllStates = false;
//If the state does not already exist, add it and add it as the shift/goto in the action table
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
}
}
}
@@ -149,62 +160,116 @@ std::string Parser::stateSetToString() {
return concat;
}
int Parser::gotoTable(int state, Symbol* token) {
std::vector<ParseRule*> allInState = *(stateSets[state]->getTotal());
ParseRule* currentRule;
for (std::vector<ParseRule*>::size_type i = 0; i < allInState.size(); i++) {
currentRule = allInState[i];
if (*(currentRule->getAtNextIndex()) == *token) {
ParseRule* advancedCurrent = currentRule->clone();
advancedCurrent->advancePointer();
for (std::vector<State*>::size_type j = 0; j < stateSets.size(); j++) {
for (std::vector<ParseRule*>::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) {
if ( *(stateSets[j]->basis[k]) == *advancedCurrent)
return(j);
}
}
void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
//find what state num the from state is
int stateNum = -1;
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
if (*(stateSets[i]) == *fromState) {
stateNum = i;
break;
}
}
return(-1);
//std::cout << "stateNum is " << stateNum << std::endl;
//If state not in table, add up to and it.
//std::cout << "table size is " << table.size() <<std::endl;
while (stateNum >= table.size()) {
//std::cout << "Pushing back table" << std::endl;
table.push_back(new std::vector<ParseAction*>);
}
//find out what index this symbol is on
int symbolIndex = -1;
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( *(symbolIndexVec[i]) == *tranSymbol ) {
//Has been found
symbolIndex = i;
break;
}
}
//std::cout << "symbolIndex is " << symbolIndex << std::endl;
//If we've never done this symbol, add it
if (symbolIndex < 0) {
// std::cout << "pushing back symbolIndexVec" <<std::endl;
symbolIndex = symbolIndexVec.size();
symbolIndexVec.push_back(tranSymbol);
}
//std::cout << "symbolIndex is " << symbolIndex << " which is " << symbolIndexVec[symbolIndex]->toString() << std::endl;
//std::cout << table[stateNum] << " ";
while (symbolIndex >= table[stateNum]->size()) {
table[stateNum]->push_back(NULL);
}
//If this table slot is empty
//std::cout << "table[stateNum] is " << table[stateNum] << std::endl;
//std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl;
if ( (*(table[stateNum]))[symbolIndex] == NULL ) {
std::cout << "Null, adding " << action->toString() << std::endl;
(*(table[stateNum]))[symbolIndex] = action;
}
//If the slot is not empty and does not contain ourself, then it is a conflict
else if ( *((*(table[stateNum]))[symbolIndex]) != *action) {
std::cout << "not Null!" << std::endl;
std::cout << "Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl;
//Don't overwrite
//(*(table[stateNum]))[symbolIndex] = action;
}
}
ParseAction* Parser::actionTable(int state, Symbol* token) {
std::vector<ParseRule*>* allStateRules = stateSets[state]->getTotal();
ParseRule* currentRule;
std::string Parser::tableToString() {
std::string concat = "";
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++)
concat += "\t" + symbolIndexVec[i]->toString();
concat += "\n";
//Get the completed Goal rule for comparision to see if we need to accept
ParseRule* completedGoal = stateSets[0]->basis[0]->clone();
while (completedGoal->advancePointer()) {}
for (std::vector<ParseRule*>::size_type i = 0; i < allStateRules->size(); i++) {
currentRule = (*allStateRules)[i];
//If the current rule in the state is completed, then do a reduce action
if (currentRule->isAtEnd()) {
//But first, if our advanced rule is equal to the completedGoal, we accept
if (*currentRule == *completedGoal)
return new ParseAction(ParseAction::ACCEPT);
return new ParseAction(ParseAction::REDUCE, currentRule);
for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) {
concat += intToString(i) + "\t";
for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) {
if ( (*(table[i]))[j] != NULL)
concat += (*(table[i]))[j]->toString() + "\t";
else
concat += "NULL\t";
}
//If the current rule in the state is not completed, see if it has the next correct token
//std::cout << currentRule->getAtNextIndex()->toString() << " comp to " << token->toString() << std::endl;
if ( *(currentRule->getAtNextIndex()) == *token){
//If it does have the correct next token, then find the state that has this rule advanced as basis, that is the state we shift to
//Goes to n^2 here, really need that table
ParseRule* advancedCurrent = currentRule->clone();
advancedCurrent->advancePointer();
for (std::vector<State*>::size_type j = 0; j < stateSets.size(); j++) {
for (std::vector<ParseRule*>::size_type k = 0; k < stateSets[j]->basis.size(); k++ ) {
if ( *(stateSets[j]->basis[k]) == *advancedCurrent)
return new ParseAction(ParseAction::SHIFT, j);
}
}
}
concat += "\n";
}
return new ParseAction(ParseAction::REJECT);
return(concat);
}
ParseAction* Parser::getTable(int state, Symbol* token) {
int symbolIndex = -1;
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( *(symbolIndexVec[i]) == *token) {
symbolIndex = i;
break;
}
}
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec
//(This assumes singular goal assignment, a simplification for now)
if (state == 1 && symbolIndex == -1)
return(new ParseAction(ParseAction::ACCEPT));
//Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol
if (symbolIndex == -1)
symbolIndex = 0;
//If ourside the symbol range of this state (same as NULL), reject
if ( symbolIndex >= table[state]->size() )
return(new ParseAction(ParseAction::REJECT));
ParseAction* action = (*(table[state]))[symbolIndex];
//If null, reject. (this is a space with no other action)
if (action == NULL)
return(new ParseAction(ParseAction::REJECT));
//Otherwise, we have something, so return it
return (action);
}
NodeTree* Parser::parseInput(std::string inputString) {
@@ -217,10 +282,13 @@ NodeTree* Parser::parseInput(std::string inputString) {
symbolStack.push(new Symbol("INVALID", false));
while (true) {
action = actionTable(stateStack.top(), token);
std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
action = getTable(stateStack.top(), token);
switch (action->action) {
case ParseAction::REDUCE:
{
std::cout << "Reduce by " << action->reduceRule->toString() << std::endl;
int rightSideLength = action->reduceRule->getRightSide().size();
//Keep track of symbols popped for parse tree
std::vector<Symbol*> poppedSymbols;
@@ -234,15 +302,18 @@ NodeTree* Parser::parseInput(std::string inputString) {
Symbol* newSymbol = action->reduceRule->getLeftSide()->clone();
newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols));
symbolStack.push(newSymbol);
stateStack.push(gotoTable(stateStack.top(), symbolStack.top()));
std::cout << "Reduce by " << action->reduceRule->toString() << std::endl;
std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
stateStack.push(getTable(stateStack.top(), symbolStack.top())->shiftState);
std::cout << "Reduced, now condition is" << std::endl;
std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
break;
}
case ParseAction::SHIFT:
std::cout << "Shift " << token->toString() << std::endl;
symbolStack.push(token);
token = new Symbol("\""+inputReader.word()+"\"", true);
stateStack.push(action->shiftState);
std::cout << "Shift " << symbolStack.top()->toString() << std::endl;
break;
case ParseAction::ACCEPT:
std::cout << "ACCEPTED!" << std::endl;

View File

@@ -42,6 +42,12 @@ std::vector<ParseRule*>* State::getTotal() {
}
return(&total);
}
std::vector<ParseRule*>* State::getBasis() {
return &basis;
}
std::vector<ParseRule*>* State::getRemaining() {
return &remaining;
}
bool State::containsRule(ParseRule* rule) {
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
@@ -55,12 +61,6 @@ bool State::containsRule(ParseRule* rule) {
return false;
}
std::string State::intToString(int theInt) {
std::stringstream converter;
converter << theInt;
return converter.str();
}
std::string State::toString() {
std::string concat = "";
concat += "State " + intToString(number) + ":\n";

7
src/util.cpp Normal file
View File

@@ -0,0 +1,7 @@
#include "util.h"
std::string intToString(int theInt) {
std::stringstream converter;
converter << theInt;
return converter.str();
}