From 0c651a5d9e4bb507d08c3835fd6884e701d91b11 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Thu, 13 Jun 2013 14:25:10 -0400 Subject: [PATCH] Added EOF symbol, making the getTable() saner for ACCEPT and lookahead support in ParseRule --- CMakeLists.txt | 2 +- include/Lexer.h | 20 ++++++++++++++++++++ include/ParseRule.h | 3 ++- include/Parser.h | 7 ++----- include/util.h | 4 ++++ main.cpp | 3 ++- src/Lexer.cpp | 24 ++++++++++++++++++++++++ src/ParseRule.cpp | 3 ++- src/Parser.cpp | 22 +++++++++++----------- 9 files changed, 68 insertions(+), 20 deletions(-) create mode 100644 include/Lexer.h create mode 100644 src/Lexer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f215870..aaf817a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/Lexer.h b/include/Lexer.h new file mode 100644 index 0000000..d8b1b7b --- /dev/null +++ b/include/Lexer.h @@ -0,0 +1,20 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "util.h" +#include "StringReader.h" +#include "Symbol.h" + +#include + +class Lexer { + public: + Lexer(); + Lexer(std::string inputString); + ~Lexer(); + void setInput(std::string inputString); + Symbol* next(); + private: + StringReader reader; +}; +#endif \ No newline at end of file diff --git a/include/ParseRule.h b/include/ParseRule.h index e4a3470..9f05748 100644 --- a/include/ParseRule.h +++ b/include/ParseRule.h @@ -14,7 +14,7 @@ class ParseRule { public: ParseRule(); - ParseRule(Symbol* leftHandle, int pointerIndex, std::vector &rightSide); + ParseRule(Symbol* leftHandle, int pointerIndex, std::vector &rightSide, Symbol* lookahead = NULL); ~ParseRule(); bool const operator==(const ParseRule &other); @@ -41,6 +41,7 @@ class ParseRule { private: int pointerIndex; Symbol* leftHandle; + Symbol* lookahead; std::vector rightSide; }; diff --git a/include/Parser.h b/include/Parser.h index b70c99d..376e2cb 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -1,16 +1,13 @@ #ifndef PARSER_H #define PARSER_H -#ifndef NULL -#define NULL 0 -#endif - #include "util.h" #include "ParseRule.h" #include "ParseAction.h" #include "Symbol.h" #include "State.h" #include "StringReader.h" +#include "Lexer.h" #include "NodeTree.h" #include @@ -32,7 +29,7 @@ class Parser { std::string stateSetToString(); void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action); ParseAction* getTable(int state, Symbol* token); - NodeTree* parseInput(std::string inputString); + NodeTree* parseInput(Lexer* lexer); std::string grammerToString(); std::string grammerToDOT(); diff --git a/include/util.h b/include/util.h index 0a79c2d..6192b1b 100644 --- a/include/util.h +++ b/include/util.h @@ -1,6 +1,10 @@ #ifndef UTIL_H #define UTIL_H +#ifndef NULL +#define NULL 0 +#endif + #include #include diff --git a/main.cpp b/main.cpp index 5f992b7..e46ad53 100644 --- a/main.cpp +++ b/main.cpp @@ -1,4 +1,5 @@ #include "NodeTree.h" +#include "Lexer.h" #include "Parser.h" #include #include @@ -67,7 +68,7 @@ int main(int argc, char* argv[]) { //outFile << parser.grammerToDOT() << std::endl; std::cout << programInputFileString << std::endl; - NodeTree* parseTree = parser.parseInput(programInputFileString); + NodeTree* parseTree = parser.parseInput(new Lexer(programInputFileString)); if (parseTree) { std::cout << parseTree->DOTGraphString() << std::endl; diff --git a/src/Lexer.cpp b/src/Lexer.cpp new file mode 100644 index 0000000..964104a --- /dev/null +++ b/src/Lexer.cpp @@ -0,0 +1,24 @@ +#include "Lexer.h" + +Lexer::Lexer() { + //Do nothing +} + +Lexer::Lexer(std::string inputString) { + reader.setString(inputString); +} + +Lexer::~Lexer() { + //No cleanup necessary +} + +void Lexer::setInput(std::string inputString) { + reader.setString(inputString); +} + +Symbol* Lexer::next() { + std::string token = reader.word(); + if (token != "") + return new Symbol("\""+token+"\"", true); + return new Symbol("$EOF$", false); +} \ No newline at end of file diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index cf69511..52b501e 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -5,10 +5,11 @@ ParseRule::ParseRule() { leftHandle = NULL; } -ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector &rightSide) { +ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector &rightSide, Symbol* lookahead) { this->leftHandle = leftHandle; this->pointerIndex = pointerIndex; this->rightSide = rightSide; + this->lookahead = lookahead; } ParseRule::~ParseRule() { diff --git a/src/Parser.cpp b/src/Parser.cpp index 25ea7ee..c00beb4 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -162,6 +162,10 @@ std::string Parser::stateSetToString() { void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) { + //If this is the first time we're adding to the table, add the EOF character + if (symbolIndexVec.size() == 0) + symbolIndexVec.push_back(new Symbol("$EOF$", false)); + //find what state num the from state is int stateNum = -1; for (std::vector::size_type i = 0; i < stateSets.size(); i++) { @@ -250,15 +254,11 @@ ParseAction* Parser::getTable(int state, Symbol* token) { } } - //This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec + //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec //(This assumes singular goal assignment, a simplification for now) - if (state == 1 && symbolIndex == -1) + if (state == 1 && symbolIndex == 0) return(new ParseAction(ParseAction::ACCEPT)); - //Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol - if (symbolIndex == -1) - symbolIndex = 0; - //If ourside the symbol range of this state (same as NULL), reject if ( symbolIndex >= table[state]->size() ) return(new ParseAction(ParseAction::REJECT)); @@ -272,10 +272,8 @@ ParseAction* Parser::getTable(int state, Symbol* token) { return (action); } -NodeTree* Parser::parseInput(std::string inputString) { - StringReader inputReader; - inputReader.setString(inputString); - Symbol* token = new Symbol("\""+inputReader.word()+"\"", true); +NodeTree* Parser::parseInput(Lexer* lexer) { + Symbol* token = lexer->next(); ParseAction* action; stateStack.push(0); @@ -284,6 +282,7 @@ NodeTree* Parser::parseInput(std::string inputString) { while (true) { std::cout << "In state: " << intToString(stateStack.top()) << std::endl; action = getTable(stateStack.top(), token); + std::cout << "Doing ParseAction: " << action->toString() << std::endl; switch (action->action) { case ParseAction::REDUCE: { @@ -312,7 +311,7 @@ NodeTree* Parser::parseInput(std::string inputString) { std::cout << "Shift " << token->toString() << std::endl; symbolStack.push(token); - token = new Symbol("\""+inputReader.word()+"\"", true); + token = lexer->next(); stateStack.push(action->shiftState); break; case ParseAction::ACCEPT: @@ -321,6 +320,7 @@ NodeTree* Parser::parseInput(std::string inputString) { break; case ParseAction::REJECT: std::cout << "REJECTED!" << std::endl; + std::cout << "REJECTED Symbol was " << token->toString() << std::endl; return(NULL); break; }