Added EOF symbol, making the getTable() saner for ACCEPT and lookahead support in ParseRule
This commit is contained in:
@@ -4,7 +4,7 @@ project(Kraken)
|
||||
|
||||
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
|
||||
|
||||
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp )
|
||||
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp )
|
||||
|
||||
include_directories( ${MY_INCLUDES} )
|
||||
|
||||
|
||||
20
include/Lexer.h
Normal file
20
include/Lexer.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include "util.h"
|
||||
#include "StringReader.h"
|
||||
#include "Symbol.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
class Lexer {
|
||||
public:
|
||||
Lexer();
|
||||
Lexer(std::string inputString);
|
||||
~Lexer();
|
||||
void setInput(std::string inputString);
|
||||
Symbol* next();
|
||||
private:
|
||||
StringReader reader;
|
||||
};
|
||||
#endif
|
||||
@@ -14,7 +14,7 @@
|
||||
class ParseRule {
|
||||
public:
|
||||
ParseRule();
|
||||
ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide);
|
||||
ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide, Symbol* lookahead = NULL);
|
||||
~ParseRule();
|
||||
|
||||
bool const operator==(const ParseRule &other);
|
||||
@@ -41,6 +41,7 @@ class ParseRule {
|
||||
private:
|
||||
int pointerIndex;
|
||||
Symbol* leftHandle;
|
||||
Symbol* lookahead;
|
||||
std::vector<Symbol*> rightSide;
|
||||
|
||||
};
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
#include "util.h"
|
||||
#include "ParseRule.h"
|
||||
#include "ParseAction.h"
|
||||
#include "Symbol.h"
|
||||
#include "State.h"
|
||||
#include "StringReader.h"
|
||||
#include "Lexer.h"
|
||||
#include "NodeTree.h"
|
||||
|
||||
#include <map>
|
||||
@@ -32,7 +29,7 @@ class Parser {
|
||||
std::string stateSetToString();
|
||||
void addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action);
|
||||
ParseAction* getTable(int state, Symbol* token);
|
||||
NodeTree* parseInput(std::string inputString);
|
||||
NodeTree* parseInput(Lexer* lexer);
|
||||
|
||||
std::string grammerToString();
|
||||
std::string grammerToDOT();
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
#ifndef UTIL_H
|
||||
#define UTIL_H
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
3
main.cpp
3
main.cpp
@@ -1,4 +1,5 @@
|
||||
#include "NodeTree.h"
|
||||
#include "Lexer.h"
|
||||
#include "Parser.h"
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
@@ -67,7 +68,7 @@ int main(int argc, char* argv[]) {
|
||||
//outFile << parser.grammerToDOT() << std::endl;
|
||||
|
||||
std::cout << programInputFileString << std::endl;
|
||||
NodeTree* parseTree = parser.parseInput(programInputFileString);
|
||||
NodeTree* parseTree = parser.parseInput(new Lexer(programInputFileString));
|
||||
|
||||
if (parseTree) {
|
||||
std::cout << parseTree->DOTGraphString() << std::endl;
|
||||
|
||||
24
src/Lexer.cpp
Normal file
24
src/Lexer.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
#include "Lexer.h"
|
||||
|
||||
Lexer::Lexer() {
|
||||
//Do nothing
|
||||
}
|
||||
|
||||
Lexer::Lexer(std::string inputString) {
|
||||
reader.setString(inputString);
|
||||
}
|
||||
|
||||
Lexer::~Lexer() {
|
||||
//No cleanup necessary
|
||||
}
|
||||
|
||||
void Lexer::setInput(std::string inputString) {
|
||||
reader.setString(inputString);
|
||||
}
|
||||
|
||||
Symbol* Lexer::next() {
|
||||
std::string token = reader.word();
|
||||
if (token != "")
|
||||
return new Symbol("\""+token+"\"", true);
|
||||
return new Symbol("$EOF$", false);
|
||||
}
|
||||
@@ -5,10 +5,11 @@ ParseRule::ParseRule() {
|
||||
leftHandle = NULL;
|
||||
}
|
||||
|
||||
ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide) {
|
||||
ParseRule::ParseRule(Symbol* leftHandle, int pointerIndex, std::vector<Symbol*> &rightSide, Symbol* lookahead) {
|
||||
this->leftHandle = leftHandle;
|
||||
this->pointerIndex = pointerIndex;
|
||||
this->rightSide = rightSide;
|
||||
this->lookahead = lookahead;
|
||||
}
|
||||
|
||||
ParseRule::~ParseRule() {
|
||||
|
||||
@@ -162,6 +162,10 @@ std::string Parser::stateSetToString() {
|
||||
|
||||
void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
|
||||
|
||||
//If this is the first time we're adding to the table, add the EOF character
|
||||
if (symbolIndexVec.size() == 0)
|
||||
symbolIndexVec.push_back(new Symbol("$EOF$", false));
|
||||
|
||||
//find what state num the from state is
|
||||
int stateNum = -1;
|
||||
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
|
||||
@@ -250,15 +254,11 @@ ParseAction* Parser::getTable(int state, Symbol* token) {
|
||||
}
|
||||
}
|
||||
|
||||
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec
|
||||
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec
|
||||
//(This assumes singular goal assignment, a simplification for now)
|
||||
if (state == 1 && symbolIndex == -1)
|
||||
if (state == 1 && symbolIndex == 0)
|
||||
return(new ParseAction(ParseAction::ACCEPT));
|
||||
|
||||
//Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol
|
||||
if (symbolIndex == -1)
|
||||
symbolIndex = 0;
|
||||
|
||||
//If ourside the symbol range of this state (same as NULL), reject
|
||||
if ( symbolIndex >= table[state]->size() )
|
||||
return(new ParseAction(ParseAction::REJECT));
|
||||
@@ -272,10 +272,8 @@ ParseAction* Parser::getTable(int state, Symbol* token) {
|
||||
return (action);
|
||||
}
|
||||
|
||||
NodeTree* Parser::parseInput(std::string inputString) {
|
||||
StringReader inputReader;
|
||||
inputReader.setString(inputString);
|
||||
Symbol* token = new Symbol("\""+inputReader.word()+"\"", true);
|
||||
NodeTree* Parser::parseInput(Lexer* lexer) {
|
||||
Symbol* token = lexer->next();
|
||||
ParseAction* action;
|
||||
|
||||
stateStack.push(0);
|
||||
@@ -284,6 +282,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
||||
while (true) {
|
||||
std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
|
||||
action = getTable(stateStack.top(), token);
|
||||
std::cout << "Doing ParseAction: " << action->toString() << std::endl;
|
||||
switch (action->action) {
|
||||
case ParseAction::REDUCE:
|
||||
{
|
||||
@@ -312,7 +311,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
||||
std::cout << "Shift " << token->toString() << std::endl;
|
||||
|
||||
symbolStack.push(token);
|
||||
token = new Symbol("\""+inputReader.word()+"\"", true);
|
||||
token = lexer->next();
|
||||
stateStack.push(action->shiftState);
|
||||
break;
|
||||
case ParseAction::ACCEPT:
|
||||
@@ -321,6 +320,7 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
||||
break;
|
||||
case ParseAction::REJECT:
|
||||
std::cout << "REJECTED!" << std::endl;
|
||||
std::cout << "REJECTED Symbol was " << token->toString() << std::endl;
|
||||
return(NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user