Added EOF symbol, making the getTable() saner for ACCEPT and lookahead support in ParseRule

This commit is contained in:
Nathan Braswell
2013-06-13 14:25:10 -04:00
parent 949dbc532a
commit 0c651a5d9e
9 changed files with 68 additions and 20 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp ) set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp )
include_directories( ${MY_INCLUDES} ) include_directories( ${MY_INCLUDES} )

20
include/Lexer.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef LEXER_H
#define LEXER_H
#include "util.h"
#include "StringReader.h"
#include "Symbol.h"
#include <string>
class Lexer {
public:
Lexer();
Lexer(std::string inputString);
~Lexer();
void setInput(std::string inputString);
Symbol* next();
private:
StringReader reader;
};
#endif

View File

@@ -14,7 +14,7 @@
class ParseRule { class ParseRule {
public: public:
ParseRule(); ParseRule();
ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide); ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide, Symbol* lookahead = NULL);
~ParseRule(); ~ParseRule();
bool const operator==(const ParseRule &other); bool const operator==(const ParseRule &other);
@@ -41,6 +41,7 @@ class ParseRule {
private: private:
int pointerIndex; int pointerIndex;
Symbol* leftHandle; Symbol* leftHandle;
Symbol* lookahead;
std::vector<Symbol*> rightSide; std::vector<Symbol*> rightSide;
}; };

View File

@@ -1,16 +1,13 @@
#ifndef PARSER_H #ifndef PARSER_H
#define PARSER_H #define PARSER_H
#ifndef NULL
#define NULL 0
#endif
#include "util.h" #include "util.h"
#include "ParseRule.h" #include "ParseRule.h"
#include "ParseAction.h" #include "ParseAction.h"
#include "Symbol.h" #include "Symbol.h"
#include "State.h" #include "State.h"
#include "StringReader.h" #include "StringReader.h"
#include "Lexer.h"
#include "NodeTree.h" #include "NodeTree.h"
#include <map> #include <map>
@@ -32,7 +29,7 @@ class Parser {
std::string stateSetToString(); std::string stateSetToString();
void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action); void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action);
ParseAction* getTable(int state, Symbol* token); ParseAction* getTable(int state, Symbol* token);
NodeTree* parseInput(std::string inputString); NodeTree* parseInput(Lexer* lexer);
std::string grammerToString(); std::string grammerToString();
std::string grammerToDOT(); std::string grammerToDOT();

View File

@@ -1,6 +1,10 @@
#ifndef UTIL_H #ifndef UTIL_H
#define UTIL_H #define UTIL_H
#ifndef NULL
#define NULL 0
#endif
#include <string> #include <string>
#include <sstream> #include <sstream>

View File

@@ -1,4 +1,5 @@
#include "NodeTree.h" #include "NodeTree.h"
#include "Lexer.h"
#include "Parser.h" #include "Parser.h"
#include <string> #include <string>
#include <iostream> #include <iostream>
@@ -67,7 +68,7 @@ int main(int argc, char* argv[]) {
//outFile << parser.grammerToDOT() << std::endl; //outFile << parser.grammerToDOT() << std::endl;
std::cout << programInputFileString << std::endl; std::cout << programInputFileString << std::endl;
NodeTree* parseTree = parser.parseInput(programInputFileString); NodeTree* parseTree = parser.parseInput(new Lexer(programInputFileString));
if (parseTree) { if (parseTree) {
std::cout << parseTree->DOTGraphString() << std::endl; std::cout << parseTree->DOTGraphString() << std::endl;

24
src/Lexer.cpp Normal file
View File

@@ -0,0 +1,24 @@
#include "Lexer.h"
Lexer::Lexer() {
//Do nothing
}
Lexer::Lexer(std::string inputString) {
reader.setString(inputString);
}
Lexer::~Lexer() {
//No cleanup necessary
}
void Lexer::setInput(std::string inputString) {
reader.setString(inputString);
}
Symbol* Lexer::next() {
std::string token = reader.word();
if (token != "")
return new Symbol("\""+token+"\"", true);
return new Symbol("$EOF$", false);
}

View File

@@ -5,10 +5,11 @@ ParseRule::ParseRule() {
leftHandle = NULL; leftHandle = NULL;
} }
ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide) { ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide, Symbol* lookahead) {
this->leftHandle = leftHandle; this->leftHandle = leftHandle;
this->pointerIndex = pointerIndex; this->pointerIndex = pointerIndex;
this->rightSide = rightSide; this->rightSide = rightSide;
this->lookahead = lookahead;
} }
ParseRule::~ParseRule() { ParseRule::~ParseRule() {

View File

@@ -162,6 +162,10 @@ std::string Parser::stateSetToString() {
void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) { void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
//If this is the first time we're adding to the table, add the EOF character
if (symbolIndexVec.size() == 0)
symbolIndexVec.push_back(new Symbol("$EOF$", false));
//find what state num the from state is //find what state num the from state is
int stateNum = -1; int stateNum = -1;
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) { for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
@@ -250,15 +254,11 @@ ParseAction* Parser::getTable(int state, Symbol* token) {
} }
} }
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec //This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec
//(This assumes singular goal assignment, a simplification for now) //(This assumes singular goal assignment, a simplification for now)
if (state == 1 && symbolIndex == -1) if (state == 1 && symbolIndex == 0)
return(new ParseAction(ParseAction::ACCEPT)); return(new ParseAction(ParseAction::ACCEPT));
//Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol
if (symbolIndex == -1)
symbolIndex = 0;
//If ourside the symbol range of this state (same as NULL), reject //If ourside the symbol range of this state (same as NULL), reject
if ( symbolIndex >= table[state]->size() ) if ( symbolIndex >= table[state]->size() )
return(new ParseAction(ParseAction::REJECT)); return(new ParseAction(ParseAction::REJECT));
@@ -272,10 +272,8 @@ ParseAction* Parser::getTable(int state, Symbol* token) {
return (action); return (action);
} }
NodeTree* Parser::parseInput(std::string inputString) { NodeTree* Parser::parseInput(Lexer* lexer) {
StringReader inputReader; Symbol* token = lexer->next();
inputReader.setString(inputString);
Symbol* token = new Symbol("\""+inputReader.word()+"\"", true);
ParseAction* action; ParseAction* action;
stateStack.push(0); stateStack.push(0);
@@ -284,6 +282,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
while (true) { while (true) {
std::cout << "In state: " << intToString(stateStack.top()) << std::endl; std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
action = getTable(stateStack.top(), token); action = getTable(stateStack.top(), token);
std::cout << "Doing ParseAction: " << action->toString() << std::endl;
switch (action->action) { switch (action->action) {
case ParseAction::REDUCE: case ParseAction::REDUCE:
{ {
@@ -312,7 +311,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
std::cout << "Shift " << token->toString() << std::endl; std::cout << "Shift " << token->toString() << std::endl;
symbolStack.push(token); symbolStack.push(token);
token = new Symbol("\""+inputReader.word()+"\"", true); token = lexer->next();
stateStack.push(action->shiftState); stateStack.push(action->shiftState);
break; break;
case ParseAction::ACCEPT: case ParseAction::ACCEPT:
@@ -321,6 +320,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
break; break;
case ParseAction::REJECT: case ParseAction::REJECT:
std::cout << "REJECTED!" << std::endl; std::cout << "REJECTED!" << std::endl;
std::cout << "REJECTED Symbol was " << token->toString() << std::endl;
return(NULL); return(NULL);
break; break;
} }