Added EOF symbol, making the getTable() saner for ACCEPT and lookahead support in ParseRule
This commit is contained in:
@@ -4,7 +4,7 @@ project(Kraken)
|
|||||||
|
|
||||||
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
|
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
|
||||||
|
|
||||||
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp )
|
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp )
|
||||||
|
|
||||||
include_directories( ${MY_INCLUDES} )
|
include_directories( ${MY_INCLUDES} )
|
||||||
|
|
||||||
|
|||||||
20
include/Lexer.h
Normal file
20
include/Lexer.h
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
#ifndef LEXER_H
|
||||||
|
#define LEXER_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
#include "StringReader.h"
|
||||||
|
#include "Symbol.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
class Lexer {
|
||||||
|
public:
|
||||||
|
Lexer();
|
||||||
|
Lexer(std::string inputString);
|
||||||
|
~Lexer();
|
||||||
|
void setInput(std::string inputString);
|
||||||
|
Symbol* next();
|
||||||
|
private:
|
||||||
|
StringReader reader;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
class ParseRule {
|
class ParseRule {
|
||||||
public:
|
public:
|
||||||
ParseRule();
|
ParseRule();
|
||||||
ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide);
|
ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide, Symbol* lookahead = NULL);
|
||||||
~ParseRule();
|
~ParseRule();
|
||||||
|
|
||||||
bool const operator==(const ParseRule &other);
|
bool const operator==(const ParseRule &other);
|
||||||
@@ -41,6 +41,7 @@ class ParseRule {
|
|||||||
private:
|
private:
|
||||||
int pointerIndex;
|
int pointerIndex;
|
||||||
Symbol* leftHandle;
|
Symbol* leftHandle;
|
||||||
|
Symbol* lookahead;
|
||||||
std::vector<Symbol*> rightSide;
|
std::vector<Symbol*> rightSide;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,16 +1,13 @@
|
|||||||
#ifndef PARSER_H
|
#ifndef PARSER_H
|
||||||
#define PARSER_H
|
#define PARSER_H
|
||||||
|
|
||||||
#ifndef NULL
|
|
||||||
#define NULL 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "ParseRule.h"
|
#include "ParseRule.h"
|
||||||
#include "ParseAction.h"
|
#include "ParseAction.h"
|
||||||
#include "Symbol.h"
|
#include "Symbol.h"
|
||||||
#include "State.h"
|
#include "State.h"
|
||||||
#include "StringReader.h"
|
#include "StringReader.h"
|
||||||
|
#include "Lexer.h"
|
||||||
#include "NodeTree.h"
|
#include "NodeTree.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
@@ -32,7 +29,7 @@ class Parser {
|
|||||||
std::string stateSetToString();
|
std::string stateSetToString();
|
||||||
void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action);
|
void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action);
|
||||||
ParseAction* getTable(int state, Symbol* token);
|
ParseAction* getTable(int state, Symbol* token);
|
||||||
NodeTree* parseInput(std::string inputString);
|
NodeTree* parseInput(Lexer* lexer);
|
||||||
|
|
||||||
std::string grammerToString();
|
std::string grammerToString();
|
||||||
std::string grammerToDOT();
|
std::string grammerToDOT();
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
#ifndef UTIL_H
|
#ifndef UTIL_H
|
||||||
#define UTIL_H
|
#define UTIL_H
|
||||||
|
|
||||||
|
#ifndef NULL
|
||||||
|
#define NULL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
|
|||||||
3
main.cpp
3
main.cpp
@@ -1,4 +1,5 @@
|
|||||||
#include "NodeTree.h"
|
#include "NodeTree.h"
|
||||||
|
#include "Lexer.h"
|
||||||
#include "Parser.h"
|
#include "Parser.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@@ -67,7 +68,7 @@ int main(int argc, char* argv[]) {
|
|||||||
//outFile << parser.grammerToDOT() << std::endl;
|
//outFile << parser.grammerToDOT() << std::endl;
|
||||||
|
|
||||||
std::cout << programInputFileString << std::endl;
|
std::cout << programInputFileString << std::endl;
|
||||||
NodeTree* parseTree = parser.parseInput(programInputFileString);
|
NodeTree* parseTree = parser.parseInput(new Lexer(programInputFileString));
|
||||||
|
|
||||||
if (parseTree) {
|
if (parseTree) {
|
||||||
std::cout << parseTree->DOTGraphString() << std::endl;
|
std::cout << parseTree->DOTGraphString() << std::endl;
|
||||||
|
|||||||
24
src/Lexer.cpp
Normal file
24
src/Lexer.cpp
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#include "Lexer.h"
|
||||||
|
|
||||||
|
Lexer::Lexer() {
|
||||||
|
//Do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
Lexer::Lexer(std::string inputString) {
|
||||||
|
reader.setString(inputString);
|
||||||
|
}
|
||||||
|
|
||||||
|
Lexer::~Lexer() {
|
||||||
|
//No cleanup necessary
|
||||||
|
}
|
||||||
|
|
||||||
|
void Lexer::setInput(std::string inputString) {
|
||||||
|
reader.setString(inputString);
|
||||||
|
}
|
||||||
|
|
||||||
|
Symbol* Lexer::next() {
|
||||||
|
std::string token = reader.word();
|
||||||
|
if (token != "")
|
||||||
|
return new Symbol("\""+token+"\"", true);
|
||||||
|
return new Symbol("$EOF$", false);
|
||||||
|
}
|
||||||
@@ -5,10 +5,11 @@ ParseRule::ParseRule() {
|
|||||||
leftHandle = NULL;
|
leftHandle = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide) {
|
ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide, Symbol* lookahead) {
|
||||||
this->leftHandle = leftHandle;
|
this->leftHandle = leftHandle;
|
||||||
this->pointerIndex = pointerIndex;
|
this->pointerIndex = pointerIndex;
|
||||||
this->rightSide = rightSide;
|
this->rightSide = rightSide;
|
||||||
|
this->lookahead = lookahead;
|
||||||
}
|
}
|
||||||
|
|
||||||
ParseRule::~ParseRule() {
|
ParseRule::~ParseRule() {
|
||||||
|
|||||||
@@ -162,6 +162,10 @@ std::string Parser::stateSetToString() {
|
|||||||
|
|
||||||
void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
|
void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
|
||||||
|
|
||||||
|
//If this is the first time we're adding to the table, add the EOF character
|
||||||
|
if (symbolIndexVec.size() == 0)
|
||||||
|
symbolIndexVec.push_back(new Symbol("$EOF$", false));
|
||||||
|
|
||||||
//find what state num the from state is
|
//find what state num the from state is
|
||||||
int stateNum = -1;
|
int stateNum = -1;
|
||||||
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
|
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
|
||||||
@@ -250,15 +254,11 @@ ParseAction* Parser::getTable(int state, Symbol* token) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec
|
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec
|
||||||
//(This assumes singular goal assignment, a simplification for now)
|
//(This assumes singular goal assignment, a simplification for now)
|
||||||
if (state == 1 && symbolIndex == -1)
|
if (state == 1 && symbolIndex == 0)
|
||||||
return(new ParseAction(ParseAction::ACCEPT));
|
return(new ParseAction(ParseAction::ACCEPT));
|
||||||
|
|
||||||
//Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol
|
|
||||||
if (symbolIndex == -1)
|
|
||||||
symbolIndex = 0;
|
|
||||||
|
|
||||||
//If ourside the symbol range of this state (same as NULL), reject
|
//If ourside the symbol range of this state (same as NULL), reject
|
||||||
if ( symbolIndex >= table[state]->size() )
|
if ( symbolIndex >= table[state]->size() )
|
||||||
return(new ParseAction(ParseAction::REJECT));
|
return(new ParseAction(ParseAction::REJECT));
|
||||||
@@ -272,10 +272,8 @@ ParseAction* Parser::getTable(int state, Symbol* token) {
|
|||||||
return (action);
|
return (action);
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeTree* Parser::parseInput(std::string inputString) {
|
NodeTree* Parser::parseInput(Lexer* lexer) {
|
||||||
StringReader inputReader;
|
Symbol* token = lexer->next();
|
||||||
inputReader.setString(inputString);
|
|
||||||
Symbol* token = new Symbol("\""+inputReader.word()+"\"", true);
|
|
||||||
ParseAction* action;
|
ParseAction* action;
|
||||||
|
|
||||||
stateStack.push(0);
|
stateStack.push(0);
|
||||||
@@ -284,6 +282,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
|||||||
while (true) {
|
while (true) {
|
||||||
std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
|
std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
|
||||||
action = getTable(stateStack.top(), token);
|
action = getTable(stateStack.top(), token);
|
||||||
|
std::cout << "Doing ParseAction: " << action->toString() << std::endl;
|
||||||
switch (action->action) {
|
switch (action->action) {
|
||||||
case ParseAction::REDUCE:
|
case ParseAction::REDUCE:
|
||||||
{
|
{
|
||||||
@@ -312,7 +311,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
|||||||
std::cout << "Shift " << token->toString() << std::endl;
|
std::cout << "Shift " << token->toString() << std::endl;
|
||||||
|
|
||||||
symbolStack.push(token);
|
symbolStack.push(token);
|
||||||
token = new Symbol("\""+inputReader.word()+"\"", true);
|
token = lexer->next();
|
||||||
stateStack.push(action->shiftState);
|
stateStack.push(action->shiftState);
|
||||||
break;
|
break;
|
||||||
case ParseAction::ACCEPT:
|
case ParseAction::ACCEPT:
|
||||||
@@ -321,6 +320,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
|||||||
break;
|
break;
|
||||||
case ParseAction::REJECT:
|
case ParseAction::REJECT:
|
||||||
std::cout << "REJECTED!" << std::endl;
|
std::cout << "REJECTED!" << std::endl;
|
||||||
|
std::cout << "REJECTED Symbol was " << token->toString() << std::endl;
|
||||||
return(NULL);
|
return(NULL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user