From b9ffe33d0b39cdf20cc3719b175b686cc675ca4e Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Wed, 2 Oct 2013 03:15:20 -0400 Subject: [PATCH] Made Symbol always stack, not heap, allocated. Finally fixed bugs with ASTTransformation. --- CMakeLists.txt | 2 +- include/ASTData.h | 25 ++++--- include/ASTTransformation.h | 8 +-- include/CollapseTransformation.h | 53 ++++++++++++++ include/GraphStructuredStack.h | 6 +- include/LALRParser.h | 2 +- include/Lexer.h | 2 +- include/NodeTree.h | 19 +++-- include/ParseRule.h | 28 ++++---- include/Parser.h | 20 +++--- include/RNGLRParser.h | 36 +++++----- include/RemovalTransformation.h | 6 +- include/Symbol.h | 24 +++---- include/Table.h | 16 ++--- main.cpp | 49 +++++++++++-- src/ASTData.cpp | 7 +- src/ASTTransformation.cpp | 2 +- src/GraphStructuredStack.cpp | 4 +- src/LALRParser.cpp | 22 +++--- src/Lexer.cpp | 8 +-- src/ParseRule.cpp | 43 ++++++------ src/Parser.cpp | 94 ++++++++++++------------- src/RNGLRParser.cpp | 116 +++++++++++++++---------------- src/Symbol.cpp | 29 +++++--- src/Table.cpp | 32 ++++----- 25 files changed, 375 insertions(+), 278 deletions(-) create mode 100644 include/CollapseTransformation.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ea7251e..4b933c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ASTData.h b/include/ASTData.h index 079b408..9bf1c25 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -1,27 +1,30 @@ #ifndef ASTDATA_H #define ASTDATA_H +#include +#include "Symbol.h" + #ifndef NULL #define NULL 0 #endif -#include "Symbol.h" +enum ASTType {translation_unit, interpreter_directive, identifier, + import, function, code_block, + typed_parameter, expression, boolean_expression, statement, + if_statement, return_statement, assignment_statement, function_call, + value}; +enum ValueType {none, boolean, integer, floating, double_percision, char_string }; -#include class ASTData { public: - enum ASTType {translation_unit, interpreter_directive, identifier, - import, interpreter_directive, function, code_block, - typed_parameter, expression, boolean_expression, statement, - if_statement, return_statement, assignment_statement, function_call, - value}; - enum ValueType {none, boolean, integer, floating, double_percision, char_string } - ASTData(ASTType type, ValueType valueType = none); - ASTData(ASTType type, Symbol* symbol, ValueType valueType = none); + ASTData(ASTType type, Symbol symbol, ValueType valueType = none); + ~ASTData(); + std::string toString(); ASTType type; - Symbol* symbol; + ValueType valueType; + Symbol symbol; private: }; diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index f2b3508..e41483a 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -1,16 +1,16 @@ #ifndef ASTTRANSFORMATION_H #define ASTTRANSFORMATION_H +#include "ASTData.h" #include "NodeTransformation.h" -class ASTTransformation: public Transformation { +class ASTTransformation: public NodeTransformation { public: ASTTransformation(); ~ASTTransformation(); - virtual NodeTree* transform(NodeTree* from); + virtual NodeTree* transform(NodeTree* from); private: //Nothing }; - -#endif \ No newline at end of file +#endif diff --git a/include/CollapseTransformation.h b/include/CollapseTransformation.h new file mode 100644 index 0000000..7863a79 --- /dev/null +++ b/include/CollapseTransformation.h @@ -0,0 +1,53 @@ +#ifndef COLLAPSETRANSFORMATION_H +#define COLLAPSETRANSFORMATION_H + +#include +#include + +#include "NodeTransformation.h" + +template +class CollapseTransformation: public NodeTransformation { + public: + CollapseTransformation(T toCollapse); + ~CollapseTransformation(); + virtual NodeTree* transform(NodeTree* from); + + private: + T toCollapse; +}; + +#endif + +template +CollapseTransformation::CollapseTransformation(T toCollapse) { + this->toCollapse = toCollapse; +} + +template +CollapseTransformation::~CollapseTransformation() { + // +} + +template +NodeTree* CollapseTransformation::transform(NodeTree* from) { + std::queue*> toProcess; + toProcess.push(from); + while(!toProcess.empty()) { + NodeTree* node = toProcess.front(); + toProcess.pop(); + std::vector*> children = node->getChildren(); + for (int i = 0; i < children.size(); i++) { + if (children[i]->getData() == toCollapse) { + node->removeChild(children[i]); + std::vector*> newChildren = children[i]->getChildren(); + node->addChildren(newChildren); + for (int j = 0; j < newChildren.size(); j++) + toProcess.push(newChildren[j]); + } + else + toProcess.push(children[i]); + } + } + return from; +} diff --git a/include/GraphStructuredStack.h b/include/GraphStructuredStack.h index 7f10039..302fd60 100644 --- a/include/GraphStructuredStack.h +++ b/include/GraphStructuredStack.h @@ -23,13 +23,13 @@ class GraphStructuredStack { std::vector*> >* getReachablePaths(NodeTree* start, int lenght); void recursivePathFind(NodeTree* start, int length, std::vector*> currentPath, std::vector*> >* paths); bool hasEdge(NodeTree* start, NodeTree* end); - NodeTree* getEdge(NodeTree* start, NodeTree* end); - void addEdge(NodeTree* start, NodeTree* end, NodeTree* edge); + NodeTree* getEdge(NodeTree* start, NodeTree* end); + void addEdge(NodeTree* start, NodeTree* end, NodeTree* edge); std::string toString(); private: std::vector*>*> gss; - std::map< std::pair< NodeTree*, NodeTree* >, NodeTree* > edges; + std::map< std::pair< NodeTree*, NodeTree* >, NodeTree* > edges; }; #endif diff --git a/include/LALRParser.h b/include/LALRParser.h index e8c35fc..6f36034 100644 --- a/include/LALRParser.h +++ b/include/LALRParser.h @@ -12,7 +12,7 @@ class LALRParser: public Parser { //Defaults in parser are mostly LALR, so we only need to //implement the actual parsing function - NodeTree* parseInput(std::string inputString); + NodeTree* parseInput(std::string inputString); private: //Nothing diff --git a/include/Lexer.h b/include/Lexer.h index d6f6946..8e87d84 100644 --- a/include/Lexer.h +++ b/include/Lexer.h @@ -15,7 +15,7 @@ class Lexer { ~Lexer(); void addRegEx(std::string regExString); void setInput(std::string inputString); - Symbol* next(); + Symbol next(); private: std::vector regExs; std::string input; diff --git a/include/NodeTree.h b/include/NodeTree.h index 56cfd02..fdb56e0 100644 --- a/include/NodeTree.h +++ b/include/NodeTree.h @@ -28,6 +28,7 @@ class NodeTree { void addChild(NodeTree* child); void addChildren(std::vector*>* children); + void addChildren(std::vector*> children); int findChild(NodeTree* child); void removeChild(NodeTree* child); void removeChild(int index); @@ -63,8 +64,6 @@ int NodeTree::idCounter; template NodeTree::NodeTree() { name = "UnnamedNode"; - data = NULL; - id = idCounter++; } @@ -93,6 +92,7 @@ const bool NodeTree::operator==(NodeTree &other) { return true; } +//Used when making a map of NodeTrees template const bool NodeTree::operator<(const NodeTree &other) const { return data < other.getData(); @@ -136,6 +136,12 @@ void NodeTree::addChildren(std::vector*>* children) { addChild((*children)[i]); } +template +void NodeTree::addChildren(std::vector*> children) { + for (typename std::vector*>::size_type i = 0; i < children.size(); i++) + addChild(children[i]); +} + template int NodeTree::findChild(NodeTree* child) { for (int i = 0; i < children.size(); i++) { @@ -231,10 +237,11 @@ std::string NodeTree::DOTGraphStringHelper(std::vector*> avoidLis template std::string NodeTree::getDOTName() { std::string DOTName = ""; - if (data != NULL) - DOTName = "\"" + replaceExEscape(name + "-" + data->toString(), "\"", "\\\"") + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one - else - DOTName = "\"" + replaceExEscape(name, "\"", " \\\"") + "_" + intToString(id) + "\""; + DOTName = "\"" + replaceExEscape(name + "-" + data.toString(), "\"", "\\\"") + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one + // if (data != NULL) + // DOTName = "\"" + replaceExEscape(name + "-" + data->toString(), "\"", "\\\"") + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one + // else + // DOTName = "\"" + replaceExEscape(name, "\"", " \\\"") + "_" + intToString(id) + "\""; return(replaceExEscape(DOTName, "\n", "\\n")); } diff --git a/include/ParseRule.h b/include/ParseRule.h index aac85a7..fc56bea 100644 --- a/include/ParseRule.h +++ b/include/ParseRule.h @@ -14,7 +14,7 @@ class ParseRule { public: ParseRule(); - ParseRule(Symbol* leftHandle, int pointerIndex, std::vector &rightSide, std::vector* lookahead); + ParseRule(Symbol leftHandle, int pointerIndex, std::vector &rightSide, std::vector* lookahead); ~ParseRule(); const bool equalsExceptLookahead(const ParseRule &other); bool const operator==(const ParseRule &other); @@ -22,32 +22,32 @@ class ParseRule { ParseRule* clone(); - void setLeftHandle(Symbol* leftHandle); - void appendToRight(Symbol* appendee); + void setLeftHandle(Symbol leftHandle); + void appendToRight(Symbol appendee); - Symbol* getLeftSide(); - void setRightSide(std::vector rightSide); - std::vector getRightSide(); - Symbol* getAtNextIndex(); - Symbol* getAtIndex(); + Symbol getLeftSide(); + void setRightSide(std::vector rightSide); + std::vector getRightSide(); + Symbol getAtNextIndex(); + Symbol getAtIndex(); int getRightSize(); int getIndex(); bool advancePointer(); bool isAtEnd(); - void setLookahead(std::vector* lookahead); - void addLookahead(std::vector* lookahead); - std::vector* getLookahead(); + void setLookahead(std::vector* lookahead); + void addLookahead(std::vector* lookahead); + std::vector* getLookahead(); std::string toString(); std::string toDOT(); private: int pointerIndex; - Symbol* leftHandle; - std::vector* lookahead; - std::vector rightSide; + Symbol leftHandle; + std::vector* lookahead; + std::vector rightSide; }; diff --git a/include/Parser.h b/include/Parser.h index fa3773f..fc43972 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -27,16 +27,16 @@ class Parser { virtual void loadGrammer(std::string grammerInputString); virtual void createStateSet(); virtual std::string stateSetToString(); - virtual NodeTree* parseInput(std::string inputString) = 0; + virtual NodeTree* parseInput(std::string inputString) = 0; virtual std::string grammerToString(); virtual std::string grammerToDOT(); std::string tableToString(); protected: - std::vector* firstSet(Symbol* token); - std::vector* firstSet(Symbol* token, std::vector avoidList); - std::vector* incrementiveFollowSet(ParseRule* rule); + std::vector* firstSet(Symbol token); + std::vector* firstSet(Symbol token, std::vector avoidList); + std::vector* incrementiveFollowSet(ParseRule* rule); virtual void closure(State* state); virtual void addStates(std::vector< State* >* stateSets, State* state, std::queue* toDo); int stateNum(State* state); @@ -44,24 +44,24 @@ class Parser { StringReader reader; Lexer lexer; - std::map, Symbol*> symbols; + std::map, Symbol> symbols; std::vector loadedGrammer; std::vector< State* > stateSets; //The EOFSymbol, a pointer because of use in table, etc - Symbol* EOFSymbol; + Symbol EOFSymbol; //The nullSymbol, ditto with above. Also used in comparisons - Symbol* nullSymbol; + Symbol nullSymbol; Table table; std::stack stateStack; - std::stack symbolStack; + std::stack symbolStack; - Symbol* getOrAddSymbol(std::string symbolString, bool isTerminal); - NodeTree* reduceTreeCombine(Symbol* newSymbol, std::vector &symbols); + Symbol getOrAddSymbol(std::string symbolString, bool isTerminal); + NodeTree* reduceTreeCombine(Symbol newSymbol, std::vector &symbols); }; #endif \ No newline at end of file diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h index 35ef95a..3e921a3 100644 --- a/include/RNGLRParser.h +++ b/include/RNGLRParser.h @@ -15,47 +15,47 @@ class RNGLRParser: public Parser { public: RNGLRParser(); ~RNGLRParser(); - NodeTree* parseInput(std::string inputString); + NodeTree* parseInput(std::string inputString); private: void reducer(int i); void shifter(int i); - void addChildren(NodeTree* parent, std::vector*>* children, NodeTree* nullableParts); + void addChildren(NodeTree* parent, std::vector*>* children, NodeTree* nullableParts); void addStates(std::vector< State* >* stateSets, State* state, std::queue* toDo); void addStateReductionsToTable(State* state); bool fullyReducesToNull(ParseRule* rule); bool reducesToNull(ParseRule* rule); - bool reducesToNull(ParseRule* rule, std::vector avoidList); + bool reducesToNull(ParseRule* rule, std::vector avoidList); - bool belongsToFamily(NodeTree* node, std::vector*>* nodes); - bool arePacked(std::vector*> nodes); - bool isPacked(NodeTree* node); - void setPacked(NodeTree* node, bool isPacked); + bool belongsToFamily(NodeTree* node, std::vector*>* nodes); + bool arePacked(std::vector*> nodes); + bool isPacked(NodeTree* node); + void setPacked(NodeTree* node, bool isPacked); - NodeTree* getNullableParts(ParseRule* rule); - NodeTree* getNullableParts(ParseRule* rule, std::vector*> avoidList); - NodeTree* getNullableParts(Symbol* symbol); + NodeTree* getNullableParts(ParseRule* rule); + NodeTree* getNullableParts(ParseRule* rule, std::vector*> avoidList); + NodeTree* getNullableParts(Symbol symbol); - std::vector*> getPathEdges(std::vector*> path); + std::vector*> getPathEdges(std::vector*> path); - std::vector input; + std::vector input; GraphStructuredStack gss; //start node, lefthand side of the reduction, reduction length struct Reduction { NodeTree* from; - Symbol* symbol; + Symbol symbol; int length; - NodeTree* nullableParts; - NodeTree* label; + NodeTree* nullableParts; + NodeTree* label; } ; std::queue toReduce; //Node coming from, state going to std::queue< std::pair*, int> > toShift; - std::vector*, int> > SPPFStepNodes; + std::vector*, int> > SPPFStepNodes; - std::vector*> nullableParts; - std::map, bool> packedMap; + std::vector*> nullableParts; + std::map, bool> packedMap; }; #endif diff --git a/include/RemovalTransformation.h b/include/RemovalTransformation.h index cc2eacd..15d3586 100644 --- a/include/RemovalTransformation.h +++ b/include/RemovalTransformation.h @@ -1,5 +1,5 @@ -#ifndef ASTTRANSFORMATION_H -#define ASTTRANSFORMATION_H +#ifndef REMOVALTRANSFORMATION_H +#define REMOVALTRANSFORMATION_H #include #include @@ -38,7 +38,7 @@ NodeTree* RemovalTransformation::transform(NodeTree* from) { toProcess.pop(); std::vector*> children = node->getChildren(); for (int i = 0; i < children.size(); i++) { - if (*(children[i]->getData()) == *toRemove) + if (children[i]->getData() == toRemove) node->removeChild(children[i]); else toProcess.push(children[i]); diff --git a/include/Symbol.h b/include/Symbol.h index bc96f1b..3aa491f 100644 --- a/include/Symbol.h +++ b/include/Symbol.h @@ -10,28 +10,28 @@ #include #include -//Circular references -//class NodeTree; - class Symbol { public: + Symbol(); Symbol(std::string name, bool isTerminal); Symbol(std::string name, bool isTerminal, std::string value); - Symbol(std::string name, bool isTerminal, NodeTree* tree); + Symbol(std::string name, bool isTerminal, NodeTree* tree); ~Symbol(); - bool const operator==(const Symbol &other); - bool const operator!=(const Symbol &other); - std::string getName(); - std::string toString(); - Symbol* clone(); - void setSubTree(NodeTree* tree); - NodeTree* getSubTree(); + bool const operator==(const Symbol &other)const; + bool const operator!=(const Symbol &other)const; + bool const operator<(const Symbol &other)const; + + std::string getName() const; + std::string toString() const; + Symbol clone(); + void setSubTree(NodeTree* tree); + NodeTree* getSubTree(); bool isTerminal(); private: std::string name; std::string value; bool terminal; - NodeTree* subTree; + NodeTree* subTree; }; #endif \ No newline at end of file diff --git a/include/Table.h b/include/Table.h index bc7994b..fbfd3b9 100644 --- a/include/Table.h +++ b/include/Table.h @@ -11,19 +11,19 @@ class Table { public: Table(); ~Table(); - void setSymbols(Symbol* EOFSymbol, Symbol* nullSymbol); - void add(int stateNum, Symbol* tranSymbol, ParseAction* action); - void remove(int stateNum, Symbol* tranSymbol); - std::vector* get(int state, Symbol* token); - ParseAction* getShift(int state, Symbol* token); + void setSymbols(Symbol EOFSymbol, Symbol nullSymbol); + void add(int stateNum, Symbol tranSymbol, ParseAction* action); + void remove(int stateNum, Symbol tranSymbol); + std::vector* get(int state, Symbol token); + ParseAction* getShift(int state, Symbol token); std::string toString(); private: std::vector< std::vector< std::vector* >* > table; - std::vector symbolIndexVec; + std::vector symbolIndexVec; //The EOFSymbol, a pointer because of use in table, etc - Symbol* EOFSymbol; + Symbol EOFSymbol; //The nullSymbol, ditto with above. Also used in comparisons - Symbol* nullSymbol; + Symbol nullSymbol; }; #endif diff --git a/main.cpp b/main.cpp index a79fd02..639dac0 100644 --- a/main.cpp +++ b/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "NodeTree.h" #include "Symbol.h" @@ -10,12 +11,15 @@ #include "NodeTransformation.h" #include "RemovalTransformation.h" +#include "CollapseTransformation.h" +#include "ASTTransformation.h" +#include "ASTData.h" int main(int argc, char* argv[]) { std::ifstream programInFile, grammerInFile; - std::ofstream outFile, outFileTransformed; + std::ofstream outFile, outFileTransformed, outFileAST; programInFile.open(argv[1]); if (!programInFile.is_open()) { @@ -41,6 +45,12 @@ int main(int argc, char* argv[]) { return(1); } + outFileAST.open((std::string(argv[3]) + ".AST.dot").c_str()); + if (!outFileAST.is_open()) { + std::cout << "Probelm opening second output file " << std::string(argv[3]) + ".AST.dot" << "\n"; + return(1); + } + //Read the input file into a string std::string programInputFileString, grammerInputFileString; std::string line; @@ -77,7 +87,7 @@ int main(int argc, char* argv[]) { std::cout << "\nParsing" << std::endl; std::cout << programInputFileString << std::endl; - NodeTree* parseTree = parser.parseInput(programInputFileString); + NodeTree* parseTree = parser.parseInput(programInputFileString); if (parseTree) { //std::cout << parseTree->DOTGraphString() << std::endl; @@ -86,21 +96,46 @@ int main(int argc, char* argv[]) { std::cout << "ParseTree returned from parser is NULL!" << std::endl; } - NodeTransformation* removeWS = new RemovalTransformation(new Symbol("WS", false)); - NodeTree* noWhiteSpace = removeWS->transform(parseTree); - delete removeWS; + //Pre AST Transformations + std::vector*> preASTTransforms; + //Remove Transformations + preASTTransforms.push_back(new RemovalTransformation(Symbol("WS", false))); + preASTTransforms.push_back(new RemovalTransformation(Symbol("\\(", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol("\\)", true))); + //preASTTransforms.push_back(new RemovalTransformation(Symbol("/", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol("::", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol(";", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol("{", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol("}", true))); + //Collapse Transformations + preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_typed_parameter_list", false))); + preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_parameter_list", false))); + for (int i = 0; i < preASTTransforms.size(); i++) { + parseTree = preASTTransforms[i]->transform(parseTree); + } + preASTTransforms.erase(preASTTransforms.begin(), preASTTransforms.end()); - if (noWhiteSpace) { - outFileTransformed << noWhiteSpace->DOTGraphString() << std::endl; + NodeTree* AST = ASTTransformation().transform(parseTree); + //NodeTree* AST = (new ASTTransformation())->transform(parseTree); + + if (parseTree) { + outFileTransformed << parseTree->DOTGraphString() << std::endl; } else { std::cout << "Tree returned from transformation is NULL!" << std::endl; } + if (AST) { + outFileTransformed << AST->DOTGraphString() << std::endl; + } else { + std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl; + } + programInFile.close(); grammerInFile.close(); outFile.close(); outFileTransformed.close(); + outFileAST.close(); return(0); } diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 5080907..bee0a24 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -3,10 +3,9 @@ ASTData::ASTData(ASTType type, ValueType valueType) { this->type = type; this->valueType = valueType; - this->symbol = NULL; } -ASTData::ASTData(ASTType type, Symbol* symbol, ValueType valueType) { +ASTData::ASTData(ASTType type, Symbol symbol, ValueType valueType) { this->type = type; this->valueType = valueType; this->symbol = symbol; @@ -15,3 +14,7 @@ ASTData::ASTData(ASTType type, Symbol* symbol, ValueType valueType) { ASTData::~ASTData() { } + +std::string ASTData::toString() { + return "ASTData!"; +} diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 67f9321..564d28b 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -8,6 +8,6 @@ ASTTransformation::~ASTTransformation() { // } -virtual NodeTree* ASTTransformation::transform(NodeTree* from) { +NodeTree* ASTTransformation::transform(NodeTree* from) { return NULL; } diff --git a/src/GraphStructuredStack.cpp b/src/GraphStructuredStack.cpp index 0240eac..fb37d18 100644 --- a/src/GraphStructuredStack.cpp +++ b/src/GraphStructuredStack.cpp @@ -107,11 +107,11 @@ bool GraphStructuredStack::hasEdge(NodeTree* start, NodeTree* end) { return start->findChild(end) != -1; } -NodeTree* GraphStructuredStack::getEdge(NodeTree* start, NodeTree* end) { +NodeTree* GraphStructuredStack::getEdge(NodeTree* start, NodeTree* end) { return edges[std::make_pair(start, end)]; } -void GraphStructuredStack::addEdge(NodeTree* start, NodeTree* end, NodeTree* edge) { +void GraphStructuredStack::addEdge(NodeTree* start, NodeTree* end, NodeTree* edge) { start->addChild(end); end->addParent(start); edges[std::make_pair(start, end)] = edge; diff --git a/src/LALRParser.cpp b/src/LALRParser.cpp index 39ef40b..0f391ce 100644 --- a/src/LALRParser.cpp +++ b/src/LALRParser.cpp @@ -7,14 +7,14 @@ LALRParser::~LALRParser() { //Nothing to do in this version } -NodeTree* LALRParser::parseInput(std::string inputString) { +NodeTree* LALRParser::parseInput(std::string inputString) { lexer.setInput(inputString); - Symbol* token = lexer.next(); + Symbol token = lexer.next(); std::vector* actionList; ParseAction* action; stateStack.push(0); - symbolStack.push(new Symbol("INVALID", false)); + symbolStack.push(Symbol("INVALID", false)); while (true) { std::cout << "In state: " << intToString(stateStack.top()) << std::endl; @@ -28,18 +28,18 @@ NodeTree* LALRParser::parseInput(std::string inputString) { int rightSideLength = action->reduceRule->getRightSide().size(); //Keep track of symbols popped for parse tree - std::vector poppedSymbols; + std::vector poppedSymbols; for (int i = 0; i < rightSideLength; i++) { poppedSymbols.push_back(symbolStack.top()); stateStack.pop(); symbolStack.pop(); } std::reverse(poppedSymbols.begin(), poppedSymbols.end()); //To put in order - //Assign the new tree to the new Symbol - Symbol* newSymbol = action->reduceRule->getLeftSide()->clone(); - newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols)); + //Assign the new tree to the Symbol + Symbol newSymbol = action->reduceRule->getLeftSide(); + newSymbol.setSubTree(reduceTreeCombine(newSymbol, poppedSymbols)); symbolStack.push(newSymbol); - std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl; + std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top().toString() << std::endl; actionList = table.get(stateStack.top(), symbolStack.top()); action = (*(actionList))[actionList->size()-1]; @@ -50,7 +50,7 @@ NodeTree* LALRParser::parseInput(std::string inputString) { break; } case ParseAction::SHIFT: - std::cout << "Shift " << token->toString() << std::endl; + std::cout << "Shift " << token.toString() << std::endl; symbolStack.push(token); token = lexer.next(); @@ -58,11 +58,11 @@ NodeTree* LALRParser::parseInput(std::string inputString) { break; case ParseAction::ACCEPT: std::cout << "ACCEPTED!" << std::endl; - return(symbolStack.top()->getSubTree()); + return(symbolStack.top().getSubTree()); break; case ParseAction::REJECT: std::cout << "REJECTED!" << std::endl; - std::cout << "REJECTED Symbol was " << token->toString() << std::endl; + std::cout << "REJECTED Symbol was " << token.toString() << std::endl; return(NULL); break; default: diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 937787e..72055ea 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -22,11 +22,11 @@ void Lexer::addRegEx(std::string regExString) { regExs.push_back(new RegEx(regExString)); } -Symbol* Lexer::next() { +Symbol Lexer::next() { //std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <= input.length()-1) - return new Symbol("$EOF$", true); + return Symbol("$EOF$", true); int longestMatch = -1; RegEx* longestRegEx = NULL; std::string remainingString = input.substr(currentPosition,input.length()-1); @@ -42,10 +42,10 @@ Symbol* Lexer::next() { std::string eatenString = input.substr(currentPosition, longestMatch+1); currentPosition += longestMatch + 1; //std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <getPattern(), true, eatenString); + return Symbol(longestRegEx->getPattern(), true, eatenString); } else { //std::cout << "Found no applicable regex" << std::endl; //std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl; - return NULL; + return Symbol(); } } \ No newline at end of file diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index d8008de..ea96c50 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -2,11 +2,10 @@ ParseRule::ParseRule() { pointerIndex = 0; - leftHandle = NULL; lookahead = NULL; } -ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector &rightSide, std::vector* lookahead) { +ParseRule::ParseRule(Symbol leftHandle, int pointerIndex, std::vector &rightSide, std::vector* lookahead) { this->leftHandle = leftHandle; this->pointerIndex = pointerIndex; this->rightSide = rightSide; @@ -33,35 +32,35 @@ ParseRule* ParseRule::clone() { return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) ); } -void ParseRule::setLeftHandle(Symbol* leftHandle) { +void ParseRule::setLeftHandle(Symbol leftHandle) { this->leftHandle = leftHandle; } -void ParseRule::appendToRight(Symbol* appendee) { +void ParseRule::appendToRight(Symbol appendee) { rightSide.push_back(appendee); } -Symbol* ParseRule::getLeftSide() { +Symbol ParseRule::getLeftSide() { return leftHandle; } -void ParseRule::setRightSide(std::vector rightSide) { +void ParseRule::setRightSide(std::vector rightSide) { this->rightSide = rightSide; } -std::vector ParseRule::getRightSide() { +std::vector ParseRule::getRightSide() { return rightSide; } -Symbol* ParseRule::getAtNextIndex() { +Symbol ParseRule::getAtNextIndex() { if (pointerIndex >= rightSide.size()) - return NULL; + return Symbol(); return rightSide[pointerIndex]; } -Symbol* ParseRule::getAtIndex() { +Symbol ParseRule::getAtIndex() { if (pointerIndex < 1) - return NULL; + return Symbol(); return rightSide[pointerIndex-1]; } @@ -85,15 +84,15 @@ bool ParseRule::isAtEnd() { return pointerIndex == rightSide.size(); } -void ParseRule::setLookahead(std::vector* lookahead) { +void ParseRule::setLookahead(std::vector* lookahead) { this->lookahead = lookahead; } -void ParseRule::addLookahead(std::vector* lookahead) { - for (std::vector::size_type i = 0; i < lookahead->size(); i++) { +void ParseRule::addLookahead(std::vector* lookahead) { + for (std::vector::size_type i = 0; i < lookahead->size(); i++) { bool alreadyIn = false; - for (std::vector::size_type j = 0; j < this->lookahead->size(); j++) { - if (*((*lookahead)[i]) == *((*(this->lookahead))[j])) { + for (std::vector::size_type j = 0; j < this->lookahead->size(); j++) { + if ((*lookahead)[i] == (*(this->lookahead))[j]) { alreadyIn = true; break; } @@ -103,23 +102,23 @@ void ParseRule::addLookahead(std::vector* lookahead) { } } -std::vector* ParseRule::getLookahead() { +std::vector* ParseRule::getLookahead() { return lookahead; } std::string ParseRule::toString() { - std::string concat = leftHandle->toString() + " -> "; + std::string concat = leftHandle.toString() + " -> "; for (int i = 0; i < rightSide.size(); i++) { if (i == pointerIndex) concat += "(*) "; - concat += rightSide[i]->toString() + " "; + concat += rightSide[i].toString() + " "; } if (pointerIndex >= rightSide.size()) concat += "(*)"; if (lookahead != NULL) { concat += "**"; - for (std::vector::size_type i = 0; i < lookahead->size(); i++) - concat += (*lookahead)[i]->toString(); + for (std::vector::size_type i = 0; i < lookahead->size(); i++) + concat += (*lookahead)[i].toString(); concat += "**"; } return(concat); @@ -128,7 +127,7 @@ std::string ParseRule::toString() { std::string ParseRule::toDOT() { std::string concat = ""; for (int i = 0; i < rightSide.size(); i++) { - concat += leftHandle->toString() + " -> " + rightSide[i]->toString() + ";\n"; + concat += leftHandle.toString() + " -> " + rightSide[i].toString() + ";\n"; } return(concat); } diff --git a/src/Parser.cpp b/src/Parser.cpp index 1a81821..048416b 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1,21 +1,17 @@ #include "Parser.h" -Parser::Parser() { - EOFSymbol = new Symbol("$EOF$", true); - nullSymbol = new Symbol("$NULL$", true); +Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true){ table.setSymbols(EOFSymbol, nullSymbol); } Parser::~Parser() { - delete EOFSymbol; - delete nullSymbol; } -Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) { - Symbol* symbol; +Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) { + Symbol symbol; std::pair entry = std::make_pair(symbolString, isTerminal); if (symbols.find(entry) == symbols.end()) { - symbol = new Symbol(symbolString, isTerminal); + symbol = Symbol(symbolString, isTerminal); symbols[entry] = symbol; } else { symbol = symbols[entry]; @@ -31,10 +27,10 @@ void Parser::loadGrammer(std::string grammerInputString) { while(currToken != "") { //Load the left of the rule ParseRule* currentRule = new ParseRule(); - Symbol* leftSide = getOrAddSymbol(currToken, false); //Left handle is never a terminal + Symbol leftSide = getOrAddSymbol(currToken, false); //Left handle is never a terminal currentRule->setLeftHandle(leftSide); reader.word(); //Remove the = - //Add the right side, adding new Symbols to symbol map. + //Add the right side, adding Symbols to symbol map. currToken = reader.word(); while (currToken != ";") { @@ -84,7 +80,7 @@ void Parser::createStateSet() { //Set the first state's basis to be the goal rule with lookahead EOF ParseRule* goalRule = loadedGrammer[0]->clone(); - std::vector* goalRuleLookahead = new std::vector(); + std::vector* goalRuleLookahead = new std::vector(); goalRuleLookahead->push_back(EOFSymbol); goalRule->setLookahead(goalRuleLookahead); State* zeroState = new State(0, goalRule); @@ -111,38 +107,38 @@ int Parser::stateNum(State* state) { return -1; } -std::vector* Parser::firstSet(Symbol* token) { - std::vector avoidList; +std::vector* Parser::firstSet(Symbol token) { + std::vector avoidList; return firstSet(token, avoidList); } -std::vector* Parser::firstSet(Symbol* token, std::vector avoidList) { +std::vector* Parser::firstSet(Symbol token, std::vector avoidList) { //If we've already done this token, don't do it again - for (std::vector::size_type i = 0; i < avoidList.size(); i++) - if (*(avoidList[i]) == *token) { - return new std::vector(); + for (std::vector::size_type i = 0; i < avoidList.size(); i++) + if (avoidList[i] == token) { + return new std::vector(); } avoidList.push_back(token); - std::vector* first = new std::vector(); + std::vector* first = new std::vector(); //First, if the symbol is a terminal, than it's first set is just itself. - if (token->isTerminal()) { + if (token.isTerminal()) { first->push_back(token); return(first); } //Otherwise.... //Ok, to make a first set, go through the grammer, if the token it's left side, add it's production's first token's first set. //If that one includes mull, do the next one too (if it exists). - Symbol* rightToken = NULL; - std::vector* recursiveFirstSet = NULL; + Symbol rightToken; + std::vector* recursiveFirstSet = NULL; for (std::vector::size_type i = 0; i < loadedGrammer.size(); i++) { - if (*token == *(loadedGrammer[i]->getLeftSide())) { + if (token == loadedGrammer[i]->getLeftSide()) { //Loop through the rule adding first sets for each token if the previous token contained NULL bool recFirstSetHasNull = false; int j = 0; do { rightToken = loadedGrammer[i]->getRightSide()[j]; //Get token of the right side of this rule - if (rightToken->isTerminal()) { - recursiveFirstSet = new std::vector(); + if (rightToken.isTerminal()) { + recursiveFirstSet = new std::vector(); recursiveFirstSet->push_back(rightToken); } else { //Add the entire set @@ -151,8 +147,8 @@ std::vector* Parser::firstSet(Symbol* token, std::vector avoid first->insert(first->end(), recursiveFirstSet->begin(), recursiveFirstSet->end()); //Check to see if the current recursiveFirstSet contains NULL, if so, then go through again with the next token. (if there is one) recFirstSetHasNull = false; - for (std::vector::size_type k = 0; k < recursiveFirstSet->size(); k++) { - if ((*(*recursiveFirstSet)[k]) == *nullSymbol) { + for (std::vector::size_type k = 0; k < recursiveFirstSet->size(); k++) { + if ((*recursiveFirstSet)[k] == nullSymbol) { recFirstSetHasNull = true; } } @@ -165,20 +161,20 @@ std::vector* Parser::firstSet(Symbol* token, std::vector avoid } //Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. -std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { +std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { //Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end) rule = rule->clone(); rule->advancePointer(); //Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one - std::vector* followSet = new std::vector(); - std::vector* symbolFirstSet; + std::vector* followSet = new std::vector(); + std::vector* symbolFirstSet; bool symbolFirstSetHasNull = true; while (symbolFirstSetHasNull && !rule->isAtEnd()) { symbolFirstSetHasNull = false; symbolFirstSet = firstSet(rule->getAtNextIndex()); - for (std::vector::size_type i = 0; i < symbolFirstSet->size(); i++) { - if (*((*symbolFirstSet)[i]) == *nullSymbol) { + for (std::vector::size_type i = 0; i < symbolFirstSet->size(); i++) { + if ((*symbolFirstSet)[i] == nullSymbol) { symbolFirstSetHasNull = true; symbolFirstSet->erase(symbolFirstSet->begin()+i); break; @@ -192,11 +188,11 @@ std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { symbolFirstSet = rule->getLookahead(); followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end()); } - std::vector* followSetReturn = new std::vector(); - for (std::vector::size_type i = 0; i < followSet->size(); i++) { + std::vector* followSetReturn = new std::vector(); + for (std::vector::size_type i = 0; i < followSet->size(); i++) { bool alreadyIn = false; - for (std::vector::size_type j = 0; j < followSetReturn->size(); j++) - if (*((*followSet)[i]) == *((*followSetReturn)[j])) { + for (std::vector::size_type j = 0; j < followSetReturn->size(); j++) + if ((*followSet)[i] == (*followSetReturn)[j]) { alreadyIn = true; break; } @@ -216,7 +212,7 @@ void Parser::closure(State* state) { for (std::vector::size_type j = 0; j < loadedGrammer.size(); j++) { //If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side ParseRule* currentGramRule = loadedGrammer[j]->clone(); - if ( !currentStateRule->isAtEnd() && *(currentStateRule->getAtNextIndex()) == *(currentGramRule->getLeftSide())) { + if ( !currentStateRule->isAtEnd() && currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) { //std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl; //Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. //std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl; @@ -259,7 +255,7 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu //If not, create it. bool symbolAlreadyInState = false; for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) { - if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { + if (newStates[j]->basis[0]->getAtIndex() == advancedRule->getAtIndex()) { symbolAlreadyInState = true; //So now check to see if this exact rule is in this state if (!newStates[j]->containsRule(advancedRule)) @@ -276,21 +272,21 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu //Also add any completed rules as reduces in the action table //See if reduce //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... - std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); + std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); if ((*currStateTotal)[i]->isAtEnd()) { - for (std::vector::size_type j = 0; j < lookahead->size(); j++) + for (std::vector::size_type j = 0; j < lookahead->size(); j++) table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); - } else if (*((*currStateTotal)[i]->getAtNextIndex()) == *nullSymbol) { + } else if ((*currStateTotal)[i]->getAtNextIndex() == nullSymbol) { //If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack) ParseRule* nullRule = (*currStateTotal)[i]->clone(); - nullRule->setRightSide(* new std::vector()); - for (std::vector::size_type j = 0; j < lookahead->size(); j++) + nullRule->setRightSide(* new std::vector()); + for (std::vector::size_type j = 0; j < lookahead->size(); j++) table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule)); } } //Put all our new states in the set of states only if they're not already there. bool stateAlreadyInAllStates = false; - Symbol* currStateSymbol; + Symbol currStateSymbol; for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { stateAlreadyInAllStates = false; currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex(); @@ -327,13 +323,13 @@ std::string Parser::tableToString() { //parseInput is now pure virtual -NodeTree* Parser::reduceTreeCombine(Symbol* newSymbol, std::vector &symbols) { - NodeTree* newTree = new NodeTree(newSymbol->getName(), newSymbol); - for (std::vector::size_type i = 0; i < symbols.size(); i++) { - if (symbols[i]->isTerminal()) - newTree->addChild(new NodeTree(symbols[i]->getName(), symbols[i])); +NodeTree* Parser::reduceTreeCombine(Symbol newSymbol, std::vector &symbols) { + NodeTree* newTree = new NodeTree(newSymbol.getName(), newSymbol); + for (std::vector::size_type i = 0; i < symbols.size(); i++) { + if (symbols[i].isTerminal()) + newTree->addChild(new NodeTree(symbols[i].getName(), symbols[i])); else - newTree->addChild(symbols[i]->getSubTree()); + newTree->addChild(symbols[i].getSubTree()); } return(newTree); } diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 4fd4280..e58f825 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -8,7 +8,7 @@ RNGLRParser::~RNGLRParser() { // } -NodeTree* RNGLRParser::parseInput(std::string inputString) { +NodeTree* RNGLRParser::parseInput(std::string inputString) { //Check for no tokens bool accepting = false; @@ -24,23 +24,17 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { } else { std::cout << "Rejected, no input (with no accepting state)" << std::endl; } - return new NodeTree(); + return new NodeTree(); } lexer.setInput(inputString); //Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation. //It could be converted to on-line later. - Symbol* currentToken = lexer.next(); + Symbol currentToken = lexer.next(); input.push_back(currentToken); - while (*currentToken != *EOFSymbol) { - //std::cout << EOFSymbol->toString() << " " << currentToken->toString() << std::endl; + while (currentToken != EOFSymbol) { currentToken = lexer.next(); - if (currentToken != NULL) { - input.push_back(currentToken); - } else { - std::cout << "Rejected, lexer unable to fully tokenize sentence" << std::endl; - return new NodeTree(); - } + input.push_back(currentToken); } std::cout << "\nDone with Lexing\n" << std::endl; @@ -78,11 +72,11 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { // std::cout << "Checking if frontier " << i << " is empty" << std::endl; if (gss.frontierIsEmpty(i)) { std::cout << "Frontier " << i << " is empty." << std::endl; - std::cout << "Failed on " << input[i]->toString() << std::endl; + std::cout << "Failed on " << input[i].toString() << std::endl; std::cout << "Nearby is:" << std::endl; int range = 5; for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) - std::cout << input[j]->toString() << " "; + std::cout << input[j].toString() << " "; std::cout << std::endl; break; } @@ -124,7 +118,7 @@ void RNGLRParser::reducer(int i) { std::vector*> currentPath = (*paths)[j]; //Get the edges for the current path - std::vector*> pathEdges = getPathEdges(currentPath); + std::vector*> pathEdges = getPathEdges(currentPath); std::reverse(pathEdges.begin(), pathEdges.end()); //If the reduction length is 0, label as passed in is null if (reduction.length != 0) @@ -132,24 +126,24 @@ void RNGLRParser::reducer(int i) { //The end of the current path NodeTree* currentReached = currentPath[currentPath.size()-1]; - std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.symbol->toString() << std::endl; + std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl; int toState = table.getShift(currentReached->getData(), reduction.symbol)->shiftState; //If reduction length is 0, then we make the new label the appropriate nullable parts - NodeTree* newLabel = NULL; + NodeTree* newLabel = NULL; if (reduction.length == 0) { newLabel = reduction.nullableParts; } else { //Otherwise, we create the new label if we haven't already int reachedFrontier = gss.getContainingFrontier(currentReached); - for (std::vector*, int> >::size_type k = 0; k < SPPFStepNodes.size(); k++) { - if ( SPPFStepNodes[k].second == reachedFrontier && *(SPPFStepNodes[k].first->getData()) == *(reduction.symbol)) { + for (std::vector*, int> >::size_type k = 0; k < SPPFStepNodes.size(); k++) { + if ( SPPFStepNodes[k].second == reachedFrontier && SPPFStepNodes[k].first->getData() == reduction.symbol) { newLabel = SPPFStepNodes[k].first; break; } } if (!newLabel) { - newLabel = new NodeTree("frontier: " + intToString(reachedFrontier), reduction.symbol); + newLabel = new NodeTree("frontier: " + intToString(reachedFrontier), reduction.symbol); SPPFStepNodes.push_back(std::make_pair(newLabel, reachedFrontier)); } } @@ -198,7 +192,7 @@ void RNGLRParser::reducer(int i) { void RNGLRParser::shifter(int i) { if (i != input.size()-1) { std::queue< std::pair*, int> > nextShifts; - NodeTree* newLabel = new NodeTree("frontier: " + intToString(i), input[i]); + NodeTree* newLabel = new NodeTree("frontier: " + intToString(i), input[i]); while (!toShift.empty()) { std::pair*, int> shift = toShift.front(); toShift.pop(); @@ -239,7 +233,7 @@ void RNGLRParser::shifter(int i) { } } -void RNGLRParser::addChildren(NodeTree* parent, std::vector*>* children, NodeTree* nullableParts) { +void RNGLRParser::addChildren(NodeTree* parent, std::vector*>* children, NodeTree* nullableParts) { if (nullableParts) children->push_back(nullableParts); @@ -248,14 +242,14 @@ void RNGLRParser::addChildren(NodeTree* parent, std::vectoraddChildren(children); } else { if (!arePacked(parent->getChildren())) { - NodeTree* subParent = new NodeTree("AmbiguityPackInner", NULL); + NodeTree* subParent = new NodeTree("AmbiguityPackInner", Symbol("AmbiguityPackInner", true)); setPacked(subParent, true); - std::vector*> tmp = parent->getChildren(); + std::vector*> tmp = parent->getChildren(); subParent->addChildren(&tmp); parent->clearChildren(); parent->addChild(subParent); } - NodeTree* t = new NodeTree("AmbiguityPackOuter", NULL); + NodeTree* t = new NodeTree("AmbiguityPackOuter", Symbol("AmbiguityPackInner", true)); setPacked(t, true); parent->addChild(t); t->addChildren(children); @@ -263,12 +257,12 @@ void RNGLRParser::addChildren(NodeTree* parent, std::vector* node, std::vector*>* nodes) { +bool RNGLRParser::belongsToFamily(NodeTree* node, std::vector*>* nodes) { //std::cout << "Checking " << node->getData()->toString() << "'s family" << std::endl; - std::vector*> children = node->getChildren(); - for (std::vector*>::size_type i = 0; i < nodes->size(); i++) { + std::vector*> children = node->getChildren(); + for (std::vector*>::size_type i = 0; i < nodes->size(); i++) { bool containsOne = false; - for (std::vector*>::size_type j = 0; j < children.size(); j++) { + for (std::vector*>::size_type j = 0; j < children.size(); j++) { //Not sure where null comes from. For right now, just check to be sure we don't segfault if ((*nodes)[i] == children[j] || ( (*nodes)[i] != NULL && children[j] != NULL && (*(*nodes)[i]) == *(children[j]) )) { containsOne = true; @@ -282,18 +276,18 @@ bool RNGLRParser::belongsToFamily(NodeTree* node, std::vector*> nodes) { +bool RNGLRParser::arePacked(std::vector*> nodes) { bool packed = true; - for (std::vector*>::size_type i = 0; i < nodes.size(); i++) + for (std::vector*>::size_type i = 0; i < nodes.size(); i++) packed &= packedMap[*(nodes[i])]; return packed; } -bool RNGLRParser::isPacked(NodeTree* node) { +bool RNGLRParser::isPacked(NodeTree* node) { return packedMap[*node]; } -void RNGLRParser::setPacked(NodeTree* node, bool isPacked) { +void RNGLRParser::setPacked(NodeTree* node, bool isPacked) { packedMap[*node] = isPacked; } @@ -315,7 +309,7 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std: //If not, create it. bool symbolAlreadyInState = false; for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) { - if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) { + if (newStates[j]->basis[0]->getAtIndex() == advancedRule->getAtIndex()) { symbolAlreadyInState = true; //Add rule to state, combining with idenical rule except lookahead if exists newStates[j]->addRuleCombineLookahead(advancedRule); @@ -331,7 +325,7 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std: } //Put all our new states in the set of states only if they're not already there. bool stateAlreadyInAllStates = false; - Symbol* currStateSymbol; + Symbol currStateSymbol; for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) { stateAlreadyInAllStates = false; currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex(); @@ -367,9 +361,9 @@ void RNGLRParser::addStateReductionsToTable(State* state) { for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { //See if reduce //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... - std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); + std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); if ((*currStateTotal)[i]->isAtEnd()) { - for (std::vector::size_type j = 0; j < lookahead->size(); j++) + for (std::vector::size_type j = 0; j < lookahead->size(); j++) table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); //If this has an appropriate ruduction to null, get the reduce trees out } else if (reducesToNull((*currStateTotal)[i])) { @@ -377,7 +371,7 @@ void RNGLRParser::addStateReductionsToTable(State* state) { //It used to be that if is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side that is equal to //the part that we've already gone through in the rule. (so we don't pop extra off stack) //Now we use the same rule and make sure that the index location is used - for (std::vector::size_type j = 0; j < lookahead->size(); j++) + for (std::vector::size_type j = 0; j < lookahead->size(); j++) table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); } } @@ -388,33 +382,33 @@ bool RNGLRParser::fullyReducesToNull(ParseRule* rule) { } bool RNGLRParser::reducesToNull(ParseRule* rule) { - std::vector avoidList; + std::vector avoidList; return reducesToNull(rule, avoidList); } -bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector avoidList) { +bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector avoidList) { //If the rule is completed and not null, it doesn't reduce to null, it's just completed. if (rule->isAtEnd() && rule->getRightSize() != 0) return false; - for (std::vector::size_type i = 0; i < avoidList.size(); i++) - if (*(rule->getLeftSide()) == *(avoidList[i])) + for (std::vector::size_type i = 0; i < avoidList.size(); i++) + if (rule->getLeftSide() == avoidList[i]) return false; avoidList.push_back(rule->getLeftSide()); - std::vector rightSide = rule->getRightSide(); + std::vector rightSide = rule->getRightSide(); bool reduces = true; - for (std::vector::size_type i = rule->getIndex(); i < rightSide.size(); i++) { - if (*rightSide[i] == *nullSymbol) + for (std::vector::size_type i = rule->getIndex(); i < rightSide.size(); i++) { + if (rightSide[i] == nullSymbol) continue; - if (rightSide[i]->isTerminal()) { + if (rightSide[i].isTerminal()) { reduces = false; break; } bool subSymbolReduces = false; for (std::vector::size_type j = 0; j < loadedGrammer.size(); j++) { - if (*(loadedGrammer[j]->getLeftSide()) == *(rightSide[i])) { + if (loadedGrammer[j]->getLeftSide() == rightSide[i]) { if(reducesToNull(loadedGrammer[j], avoidList)) { subSymbolReduces = true; break; @@ -429,32 +423,32 @@ bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector avoidList) return reduces; } -NodeTree* RNGLRParser::getNullableParts(ParseRule* rule) { - return getNullableParts(rule, std::vector*>()); +NodeTree* RNGLRParser::getNullableParts(ParseRule* rule) { + return getNullableParts(rule, std::vector*>()); } -NodeTree* RNGLRParser::getNullableParts(ParseRule* rule, std::vector*> avoidList) { +NodeTree* RNGLRParser::getNullableParts(ParseRule* rule, std::vector*> avoidList) { if (reducesToNull(rule)) { //std::cout << "Reduces to null so adding parts " << rule->toString() << std::endl; - Symbol* symbol = rule->getLeftSide(); - NodeTree* symbolNode = new NodeTree(symbol->getName(), symbol); - if (*(rule->getAtNextIndex()) == *nullSymbol) { - symbolNode->addChild(new NodeTree(nullSymbol->getName(), nullSymbol)); + Symbol symbol = rule->getLeftSide(); + NodeTree* symbolNode = new NodeTree(symbol.getName(), symbol); + if (rule->getAtNextIndex() == nullSymbol) { + symbolNode->addChild(new NodeTree(nullSymbol.getName(), nullSymbol)); } else { //Find recursively ParseRule* iterate = rule->clone(); while (!iterate->isAtEnd()) { //Check to see if we've done this symbol already, if so use it - for (std::vector*>::size_type i = 0; i < avoidList.size(); i++) { - if (*(iterate->getAtNextIndex()) == *(avoidList[i]->getData())) { + for (std::vector*>::size_type i = 0; i < avoidList.size(); i++) { + if (iterate->getAtNextIndex() == avoidList[i]->getData()) { symbolNode->addChild(avoidList[i]); break; } } //We haven't so do it recursively for (std::vector::size_type i = 0; i < loadedGrammer.size(); i++) { - if (fullyReducesToNull(loadedGrammer[i]) && *(iterate->getAtNextIndex()) == *(loadedGrammer[i]->getLeftSide())) { - NodeTree* symbolTree = getNullableParts(loadedGrammer[i], avoidList); + if (fullyReducesToNull(loadedGrammer[i]) && iterate->getAtNextIndex() == loadedGrammer[i]->getLeftSide()) { + NodeTree* symbolTree = getNullableParts(loadedGrammer[i], avoidList); avoidList.push_back(symbolTree); symbolNode->addChild(symbolTree); } @@ -467,12 +461,12 @@ NodeTree* RNGLRParser::getNullableParts(ParseRule* rule, std::vector* RNGLRParser::getNullableParts(Symbol* symbol) { - return new NodeTree("CRAZY_SYMBOL", nullSymbol); +NodeTree* RNGLRParser::getNullableParts(Symbol symbol) { + return new NodeTree("CRAZY_SYMBOL", nullSymbol); } -std::vector*> RNGLRParser::getPathEdges(std::vector*> path) { - std::vector*> pathEdges; +std::vector*> RNGLRParser::getPathEdges(std::vector*> path) { + std::vector*> pathEdges; for (std::vector*>::size_type i = 0; i < path.size()-1; i++) pathEdges.push_back(gss.getEdge(path[i], path[i+1])); return pathEdges; diff --git a/src/Symbol.cpp b/src/Symbol.cpp index 116e38f..5acdb2c 100644 --- a/src/Symbol.cpp +++ b/src/Symbol.cpp @@ -1,5 +1,12 @@ #include "Symbol.h" +Symbol::Symbol() { + this->name = "UninitlizedSymbol"; + this->terminal = false; + this->subTree = NULL; + value = "NoValue"; +} + Symbol::Symbol(std::string name, bool isTerminal) { this->name = name; this->terminal = isTerminal; @@ -14,7 +21,7 @@ Symbol::Symbol(std::string name, bool isTerminal, std::string value) { this->value = value; } -Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) { +Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) { this->name = name; this->terminal = isTerminal; this->subTree = tree; @@ -24,31 +31,31 @@ Symbol::~Symbol() { } -const bool Symbol::operator==(const Symbol &other) { +const bool Symbol::operator==(const Symbol &other) const { return( name == other.name && terminal == other.terminal); } -const bool Symbol::operator!=(const Symbol &other) { +const bool Symbol::operator!=(const Symbol &other) const { return(!this->operator==(other)); } -std::string Symbol::getName() { +const bool Symbol::operator<(const Symbol &other) const { + return name < other.getName(); +} + +std::string Symbol::getName() const { return(name); } -std::string Symbol::toString() { +std::string Symbol::toString() const { return(name + (terminal ? " " + value : "")); } -Symbol* Symbol::clone() { - return new Symbol(name, terminal, subTree); -} - -void Symbol::setSubTree(NodeTree* tree) { +void Symbol::setSubTree(NodeTree* tree) { subTree = tree; } -NodeTree* Symbol::getSubTree() { +NodeTree* Symbol::getSubTree() { return subTree; } diff --git a/src/Table.cpp b/src/Table.cpp index abe6104..690ebd2 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -8,12 +8,12 @@ Table::~Table() { // } -void Table::setSymbols(Symbol* EOFSymbol, Symbol* nullSymbol) { +void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) { this->EOFSymbol = EOFSymbol; this->nullSymbol = nullSymbol; } -void Table::add(int stateNum, Symbol* tranSymbol, ParseAction* action) { +void Table::add(int stateNum, Symbol tranSymbol, ParseAction* action) { //If this is the first time we're adding to the table, add the EOF character if (symbolIndexVec.size() == 0) @@ -28,8 +28,8 @@ void Table::add(int stateNum, Symbol* tranSymbol, ParseAction* action) { //find out what index this symbol is on int symbolIndex = -1; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { - if ( *(symbolIndexVec[i]) == *tranSymbol ) { + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( symbolIndexVec[i] == tranSymbol ) { //Has been found symbolIndex = i; break; @@ -79,11 +79,11 @@ void Table::add(int stateNum, Symbol* tranSymbol, ParseAction* action) { } } -void Table::remove(int stateNum, Symbol* tranSymbol) { +void Table::remove(int stateNum, Symbol tranSymbol) { //find out what index this symbol is on int symbolIndex = -1; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { - if ( *(symbolIndexVec[i]) == *tranSymbol ) { + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( symbolIndexVec[i] == tranSymbol ) { //Has been found symbolIndex = i; break; @@ -92,21 +92,21 @@ void Table::remove(int stateNum, Symbol* tranSymbol) { (*(table[stateNum]))[symbolIndex] = NULL; } -std::vector* Table::get(int state, Symbol* token) { +std::vector* Table::get(int state, Symbol token) { int symbolIndex = -1; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { - if ( *(symbolIndexVec[i]) == *token) { + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) { + if ( symbolIndexVec[i] == token) { symbolIndex = i; break; } } if (symbolIndex == -1) { - std::cout << "Unrecognized symbol: " << token->toString() << ", cannot get from table!" << std::endl; + std::cout << "Unrecognized symbol: " << token.toString() << ", cannot get from table!" << std::endl; return NULL; } - std::cout << "Get for state: " << state << ", and Symbol: " << token->toString() << std::endl; + std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl; if (state < 0 || state >= table.size()) { std::cout << "State bad: " << state << std::endl; return NULL; @@ -115,7 +115,7 @@ std::vector* Table::get(int state, Symbol* token) { std::vector* action = NULL; if (symbolIndex < 0 || symbolIndex >= table[state]->size()) { - std::cout << "Symbol bad for this state: " << token->toString() << ". This is a reject." << std::endl; + std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl; } else { action = (*(table[state]))[symbolIndex]; } @@ -144,7 +144,7 @@ std::vector* Table::get(int state, Symbol* token) { return (action); } -ParseAction* Table::getShift(int state, Symbol* token) { +ParseAction* Table::getShift(int state, Symbol token) { std::vector* actions = get(state, token); ParseAction* shift = NULL; for (int i = 0; i < actions->size(); i++) { @@ -158,8 +158,8 @@ ParseAction* Table::getShift(int state, Symbol* token) { std::string Table::toString() { std::string concat = ""; - for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) - concat += "\t" + symbolIndexVec[i]->toString(); + for (std::vector::size_type i = 0; i < symbolIndexVec.size(); i++) + concat += "\t" + symbolIndexVec[i].toString(); concat += "\n"; for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) {