From 726ead0455c03cce39b57712fd9e33a71db7ae17 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 16 Jul 2013 11:15:58 -0400 Subject: [PATCH] Pull out table to it's own Table class in prep for adding RNGLR algorithm. --- CMakeLists.txt | 2 +- include/Parser.h | 10 ++-- include/Table.h | 27 +++++++++ src/Parser.cpp | 142 ++++++----------------------------------------- src/Table.cpp | 115 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 165 insertions(+), 131 deletions(-) create mode 100644 include/Table.h create mode 100644 src/Table.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a644098..bb81182 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/Parser.h b/include/Parser.h index e9faf9a..1ef1000 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -9,6 +9,7 @@ #include "StringReader.h" #include "Lexer.h" #include "NodeTree.h" +#include "Table.h" #include #include @@ -25,14 +26,13 @@ class Parser { void loadGrammer(std::string grammerInputString); std::vector* firstSet(Symbol* token); std::vector* firstSet(Symbol* token, std::vector &avoidList); - void printFirstSets(); std::vector* incrementiveFollowSet(ParseRule* rule); void createStateSet(); void closure(State* state); void addStates(std::vector< State* >* stateSets, State* state); + int stateNum(State* state); std::string stateSetToString(); - void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action); - ParseAction* getTable(int state, Symbol* token); + NodeTree* parseInput(std::string inputString); std::string grammerToString(); @@ -53,8 +53,8 @@ class Parser { //The nullSymbol, ditto with above. Also used in comparisons Symbol* nullSymbol; - std::vector< std::vector* > table; - std::vector symbolIndexVec; + Table table; + std::stack stateStack; std::stack symbolStack; diff --git a/include/Table.h b/include/Table.h new file mode 100644 index 0000000..b4354e1 --- /dev/null +++ b/include/Table.h @@ -0,0 +1,27 @@ +#include "util.h" +#include "ParseRule.h" +#include "ParseAction.h" +#include "Symbol.h" +#include "State.h" + +#ifndef TABLE_H +#define TABLE_H + +class Table { + public: + Table(); + ~Table(); + void setSymbols(Symbol* EOFSymbol, Symbol* nullSymbol); + void add(int stateNum, Symbol* tranSymbol, ParseAction* action); + ParseAction* get(int state, Symbol* token); + std::string toString(); + private: + std::vector< std::vector* > table; + std::vector symbolIndexVec; + //The EOFSymbol, a pointer because of use in table, etc + Symbol* EOFSymbol; + //The nullSymbol, ditto with above. Also used in comparisons + Symbol* nullSymbol; +}; + +#endif diff --git a/src/Parser.cpp b/src/Parser.cpp index 293e0b8..406e90f 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -3,6 +3,7 @@ Parser::Parser() { EOFSymbol = new Symbol("$EOF$", true); nullSymbol = new Symbol("$NULL$", true); + table.setSymbols(EOFSymbol, nullSymbol); } Parser::~Parser() { @@ -129,17 +130,6 @@ std::vector* Parser::firstSet(Symbol* token, std::vector &avoi return(first); } -void Parser::printFirstSets() { - std::vector* first = NULL; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { - first = firstSet(symbolIndexVec[i]); - std::cout << "First set of " << symbolIndexVec[i]->toString() << " is: "; - for (std::vector::size_type j = 0; j < first->size(); j++) - std::cout << (*first)[j]->toString() << " "; - std::cout << std::endl; - } -} - void Parser::createStateSet() { std::cout << "Begining creation of stateSet" << std::endl; //First state has no parents @@ -159,6 +149,15 @@ void Parser::createStateSet() { } } +int Parser::stateNum(State* state) { + for (std::vector::size_type i = 0; i < stateSets.size(); i++) { + if (*(stateSets[i]) == *state) { + return i; + } + } + return -1; +} + //Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { //Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end) @@ -270,13 +269,13 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state) { std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); if ((*currStateTotal)[i]->isAtEnd()) { for (std::vector::size_type j = 0; j < lookahead->size(); j++) - addToTable(state, (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); } else if (*((*currStateTotal)[i]->getAtNextIndex()) == *nullSymbol) { //If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack) ParseRule* nullRule = (*currStateTotal)[i]->clone(); nullRule->setRightSide(* new std::vector()); for (std::vector::size_type j = 0; j < lookahead->size(); j++) - addToTable(state, (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule)); + table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule)); } } //Put all our new states in the set of states only if they're not already there. @@ -290,14 +289,14 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state) { stateAlreadyInAllStates = true; //If it does exist, we should add it as the shift/goto in the action table (*stateSets)[j]->addParents(newStates[i]->getParents()); - addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); + table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); break; } } if (!stateAlreadyInAllStates) { //If the state does not already exist, add it and add it as the shift/goto in the action table stateSets->push_back(newStates[i]); - addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1)); + table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1)); } } } @@ -310,116 +309,9 @@ std::string Parser::stateSetToString() { return concat; } -void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) { - - //If this is the first time we're adding to the table, add the EOF character - if (symbolIndexVec.size() == 0) - symbolIndexVec.push_back(EOFSymbol); - - //find what state num the from state is - int stateNum = -1; - for (std::vector::size_type i = 0; i < stateSets.size(); i++) { - if (*(stateSets[i]) == *fromState) { - stateNum = i; - break; - } - } - - //std::cout << "stateNum is " << stateNum << std::endl; - - //If state not in table, add up to and it. - //std::cout << "table size is " << table.size() <= table.size()) { - //std::cout << "Pushing back table" << std::endl; - table.push_back(new std::vector); - } - - //find out what index this symbol is on - int symbolIndex = -1; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { - if ( *(symbolIndexVec[i]) == *tranSymbol ) { - //Has been found - symbolIndex = i; - break; - } - } - //std::cout << "symbolIndex is " << symbolIndex << std::endl; - - //If we've never done this symbol, add it - if (symbolIndex < 0) { - // std::cout << "pushing back symbolIndexVec" <toString() << std::endl; - - //std::cout << table[stateNum] << " "; - while (symbolIndex >= table[stateNum]->size()) { - table[stateNum]->push_back(NULL); - } - - //If this table slot is empty - //std::cout << "table[stateNum] is " << table[stateNum] << std::endl; - //std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl; - - if ( (*(table[stateNum]))[symbolIndex] == NULL ) { - //std::cout << "Null, adding " << action->toString() << std::endl; - (*(table[stateNum]))[symbolIndex] = action; - } - //If the slot is not empty and does not contain ourself, then it is a conflict - else if ( !(*(table[stateNum]))[symbolIndex]->equalsExceptLookahead(*action)) { - //std::cout << "not Null!" << std::endl; - std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl; - //Don't overwrite - //(*(table[stateNum]))[symbolIndex] = action; - } -} std::string Parser::tableToString() { - std::string concat = ""; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) - concat += "\t" + symbolIndexVec[i]->toString(); - concat += "\n"; - - for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) { - concat += intToString(i) + "\t"; - for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) { - if ( (*(table[i]))[j] != NULL) - concat += (*(table[i]))[j]->toString() + "\t"; - else - concat += "NULL\t"; - } - concat += "\n"; - } - return(concat); -} - -ParseAction* Parser::getTable(int state, Symbol* token) { - int symbolIndex = -1; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { - if ( *(symbolIndexVec[i]) == *token) { - symbolIndex = i; - break; - } - } - - //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec - //(This assumes singular goal assignment, a simplification for now) - if (state == 1 && symbolIndex == 0) - return(new ParseAction(ParseAction::ACCEPT)); - - //If ourside the symbol range of this state (same as NULL), reject - if ( symbolIndex >= table[state]->size() ) - return(new ParseAction(ParseAction::REJECT)); - - ParseAction* action = (*(table[state]))[symbolIndex]; - //If null, reject. (this is a space with no other action) - if (action == NULL) - return(new ParseAction(ParseAction::REJECT)); - - //Otherwise, we have something, so return it - return (action); + return table.toString(); } NodeTree* Parser::parseInput(std::string inputString) { @@ -432,7 +324,7 @@ NodeTree* Parser::parseInput(std::string inputString) { while (true) { std::cout << "In state: " << intToString(stateStack.top()) << std::endl; - action = getTable(stateStack.top(), token); + action = table.get(stateStack.top(), token); //std::cout << "Doing ParseAction: " << action->toString() << std::endl; switch (action->action) { case ParseAction::REDUCE: @@ -453,7 +345,7 @@ NodeTree* Parser::parseInput(std::string inputString) { newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols)); symbolStack.push(newSymbol); //std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; - stateStack.push(getTable(stateStack.top(), symbolStack.top())->shiftState); + stateStack.push(table.get(stateStack.top(), symbolStack.top())->shiftState); //std::cout << "Reduced, now condition is" << std::endl; //std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; break; diff --git a/src/Table.cpp b/src/Table.cpp new file mode 100644 index 0000000..a68a936 --- /dev/null +++ b/src/Table.cpp @@ -0,0 +1,115 @@ +#include "Table.h" + +Table::Table() { + // +} + +Table::~Table() { + // +} + +void Table::setSymbols(Symbol* EOFSymbol, Symbol* nullSymbol) { + this->EOFSymbol = EOFSymbol; + this->nullSymbol = nullSymbol; +} + +void Table::add(int stateNum, Symbol* tranSymbol, ParseAction* action) { + + //If this is the first time we're adding to the table, add the EOF character + if (symbolIndexVec.size() == 0) + symbolIndexVec.push_back(EOFSymbol); + + //If state not in table, add up to and it. + //std::cout << "table size is " << table.size() <= table.size()) { + //std::cout << "Pushing back table" << std::endl; + table.push_back(new std::vector); + } + + //find out what index this symbol is on + int symbolIndex = -1; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( *(symbolIndexVec[i]) == *tranSymbol ) { + //Has been found + symbolIndex = i; + break; + } + } + //std::cout << "symbolIndex is " << symbolIndex << std::endl; + + //If we've never done this symbol, add it + if (symbolIndex < 0) { + // std::cout << "pushing back symbolIndexVec" <toString() << std::endl; + + //std::cout << table[stateNum] << " "; + while (symbolIndex >= table[stateNum]->size()) { + table[stateNum]->push_back(NULL); + } + + //If this table slot is empty + //std::cout << "table[stateNum] is " << table[stateNum] << std::endl; + //std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl; + + if ( (*(table[stateNum]))[symbolIndex] == NULL ) { + //std::cout << "Null, adding " << action->toString() << std::endl; + (*(table[stateNum]))[symbolIndex] = action; + } + //If the slot is not empty and does not contain ourself, then it is a conflict + else if ( !(*(table[stateNum]))[symbolIndex]->equalsExceptLookahead(*action)) { + //std::cout << "not Null!" << std::endl; + std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << " on " << tranSymbol->toString() << std::endl; + //Don't overwrite + //(*(table[stateNum]))[symbolIndex] = action; + } +} + +ParseAction* Table::get(int state, Symbol* token) { + int symbolIndex = -1; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( *(symbolIndexVec[i]) == *token) { + symbolIndex = i; + break; + } + } + + //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec + //(This assumes singular goal assignment, a simplification for now) + if (state == 1 && symbolIndex == 0) + return(new ParseAction(ParseAction::ACCEPT)); + + //If ourside the symbol range of this state (same as NULL), reject + if ( symbolIndex >= table[state]->size() ) + return(new ParseAction(ParseAction::REJECT)); + + ParseAction* action = (*(table[state]))[symbolIndex]; + //If null, reject. (this is a space with no other action) + if (action == NULL) + return(new ParseAction(ParseAction::REJECT)); + + //Otherwise, we have something, so return it + return (action); +} + +std::string Table::toString() { + std::string concat = ""; + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) + concat += "\t" + symbolIndexVec[i]->toString(); + concat += "\n"; + + for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) { + concat += intToString(i) + "\t"; + for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) { + if ( (*(table[i]))[j] != NULL) + concat += (*(table[i]))[j]->toString() + "\t"; + else + concat += "NULL\t"; + } + concat += "\n"; + } + return(concat); +} \ No newline at end of file