More work towards RNGLR. First, NodeTree is now a template. Second, I've started writing the actual GLR parser and GSS and other things, but am still in the first write process.

This commit is contained in:
Nathan Braswell
2013-07-28 19:45:08 -04:00
parent 726ead0455
commit 6d7b38a03b
14 changed files with 360 additions and 146 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp )
set( MY_SOURCES main.cpp src/Parser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp )
include_directories( ${MY_INCLUDES} )

View File

@@ -0,0 +1,23 @@
#include <iostream>
#include <vector>
#include "GSSNode.h"
#ifndef GRAPH_STRUCTURED_STACK
#define GRAPH_STRUCTURED_STACK
class GraphStructuredStack {
public:
GraphStructuredStack();
~GraphStructuredStack();
GSSNode* newNode(int stateNum);
void addToFrontier(int frontier, GSSNode* node);
bool inFrontier(int frontier, int state);
bool frontierIsEmpty(int frontier);
bool frontierHasAccState(int frontier);
std::vector<GSSNode*>* getReachable(GSSNode* start, int lenght);
bool hasEdge(GSSNode* start, GSSNode* end);
void addEdge(GSSNode* start, GSSNode* end);
private:
std::vector<std::vector<GSSNode*>*> gss;
//
};

View File

@@ -6,19 +6,20 @@
#endif
#include <util.h>
#include <Symbol.h>
//#include <Symbol.h>
#include <vector>
#include <string>
#include <iostream>
//Circular references
class Symbol;
//class Symbol;
template<class T>
class NodeTree {
public:
NodeTree();
NodeTree(std::string name, Symbol* inSymbol);
NodeTree(std::string name, T inData);
~NodeTree();
void setParent(NodeTree* parent);
@@ -34,8 +35,8 @@ class NodeTree {
std::string getName();
void setName(std::string);
Symbol* getSymbol();
void setSymbol(Symbol* symbol);
T getData();
void setData(T data);
int size();
std::string DOTGraphString();
@@ -44,7 +45,7 @@ class NodeTree {
std::string DOTGraphStringHelper();
std::string getDOTName();
std::string name;
Symbol* symbol;
T data;
NodeTree* parent;
std::vector<NodeTree*> children;
@@ -52,4 +53,129 @@ class NodeTree {
int id;
};
template<class T>
int NodeTree<T>::idCounter;
template<class T>
NodeTree<T>::NodeTree() {
parent = NULL;
name = "UnnamedNode";
data = NULL;
id = idCounter++;
}
template<class T>
NodeTree<T>::NodeTree(std::string name, T inData) {
parent = NULL;
data = NULL;
this->name = name;
this->data = inData;
id = idCounter++;
}
template<class T>
NodeTree<T>::~NodeTree() {
children.clear();
}
template<class T>
void NodeTree<T>::setParent(NodeTree<T>* parent) {
if (this->parent != NULL) {
this->parent->removeChild(this);
}
this->parent = parent;
}
template<class T>
NodeTree<T>* NodeTree<T>::getParent() {
return parent;
}
template<class T>
void NodeTree<T>::addChild(NodeTree<T>* child) {
if (findChild(child) == -1)
children.push_back(child);
}
template<class T>
int NodeTree<T>::findChild(NodeTree<T>* child) {
for (int i = 0; i < children.size(); i++) {
if (children[i] == child) {
return i;
}
}
return -1;
}
template<class T>
void NodeTree<T>::removeChild(int index) {
children[index] = NULL;
children.erase(children.begin()+index);
}
template<class T>
void NodeTree<T>::removeChild(NodeTree<T>* child) {
int index = findChild(child);
if (index != 0) {
removeChild(index);
}
}
template<class T>
int NodeTree<T>::size() {
int count = 0;
for (int i = 0; i < children.size(); i++) {
count += children[i]->size();
}
return 1+count;
}
template<class T>
NodeTree<T>* NodeTree<T>::get(int index) {
return children[index];
}
template<class T>
std::string NodeTree<T>::getName() {
return name;
}
template<class T>
void NodeTree<T>::setName(std::string name) {
this->name = name;
}
template<class T>
T NodeTree<T>::getData() {
return data;
}
template<class T>
void NodeTree<T>::setData(T data) {
this->data = data;
}
template<class T>
std::string NodeTree<T>::DOTGraphString() {
return( "digraph Kraken { \n" + DOTGraphStringHelper() + "}");
}
template<class T>
std::string NodeTree<T>::DOTGraphStringHelper() {
std::string ourDOTRelation = "";
for (int i = 0; i < children.size(); i++) {
ourDOTRelation += getDOTName() + " -> " + children[i]->getDOTName() + ";\n" + children[i]->DOTGraphStringHelper();
}
return(ourDOTRelation);
}
template<class T>
std::string NodeTree<T>::getDOTName() {
if (data != NULL)
return "\"" + name + "-" + data->toString() + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one
return "\"" + name + "_" + intToString(id) + "\"";
}
#endif

View File

@@ -33,7 +33,7 @@ class Parser {
int stateNum(State* state);
std::string stateSetToString();
NodeTree* parseInput(std::string inputString);
NodeTree<Symbol*>* parseInput(std::string inputString);
std::string grammerToString();
std::string grammerToDOT();
@@ -60,7 +60,7 @@ class Parser {
std::stack<Symbol*> symbolStack;
Symbol* getOrAddSymbol(std::string symbolString, bool isTerminal);
NodeTree* reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols);
NodeTree<Symbol*>* reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols);
};
#endif

17
include/RNGLRParser.h Normal file
View File

@@ -0,0 +1,17 @@
#include <iostream>
class RNGLRParser {
public:
parseInput(std::string inputString);
reducer(int i);
shifter(int i);
private:
Lexer lexer;
std::vector<Symbol*> input;
GraphStructuredStack gss;
//start node, lefthand side of the reduction, reduction length
std::queue<std::pair< std::pair<GSSNode*, Symbol*>, int > toReduce;
//Node coming from, state going to
std::queue< std::pair<GSSNode*, int> > toShift;
};

View File

@@ -11,26 +11,26 @@
#include <string>
//Circular references
class NodeTree;
//class NodeTree;
class Symbol {
public:
Symbol(std::string name, bool isTerminal);
Symbol(std::string name, bool isTerminal, std::string value);
Symbol(std::string name, bool isTerminal, NodeTree* tree);
Symbol(std::string name, bool isTerminal, NodeTree<Symbol*>* tree);
~Symbol();
bool const operator==(const Symbol &other);
std::string getName();
std::string toString();
Symbol* clone();
void setSubTree(NodeTree* tree);
NodeTree* getSubTree();
void setSubTree(NodeTree<Symbol*>* tree);
NodeTree<Symbol*>* getSubTree();
bool isTerminal();
private:
std::string name;
std::string value;
bool terminal;
NodeTree* subTree;
NodeTree<Symbol*>* subTree;
};
#endif

View File

@@ -1,4 +1,5 @@
#include "NodeTree.h"
#include "Symbol.h"
#include "Lexer.h"
#include "Parser.h"
#include <string>
@@ -64,7 +65,7 @@ int main(int argc, char* argv[]) {
std::cout << "\n\n\n\n\n\n\n\n\n\nParsing" << std::endl;
std::cout << programInputFileString << std::endl;
NodeTree* parseTree = parser.parseInput(programInputFileString);
NodeTree<Symbol*>* parseTree = parser.parseInput(programInputFileString);
if (parseTree) {
std::cout << parseTree->DOTGraphString() << std::endl;

View File

@@ -0,0 +1,41 @@
#include "GraphStructuredStack.h"
GraphStructuredStack::GraphStructuredStack() {
//
}
GraphStructuredStack::~GraphStructuredStack() {
//
}
GSSNode* GraphStructuredStack::newNode(int stateNum) {
//
}
void GraphStructuredStack::addToFrontier(int frontier, GSSNode* node) {
//
}
bool GraphStructuredStack::inFrontier(int frontier, int state) {
//
}
bool GraphStructuredStack::frontierIsEmpty(int frontier) {
//
}
bool GraphStructuredStack::frontierHasAccState(int frontier) {
//
}
std::vector<GSSNode*>* GraphStructuredStack::getReachable(GSSNode* start, int lenght) {
//
}
bool GraphStructuredStack::hasEdge(GSSNode* start, GSSNode* end) {
//
}
void GraphStructuredStack::addEdge(GSSNode* start, GSSNode* end) {
//
}

View File

@@ -1,106 +0,0 @@
#include "NodeTree.h"
int NodeTree::idCounter;
NodeTree::NodeTree() {
parent = NULL;
name = "UnnamedNode";
symbol = NULL;
id = idCounter++;
}
NodeTree::NodeTree(std::string name, Symbol* inSymbol) {
parent = NULL;
symbol = NULL;
this->name = name;
this->symbol = inSymbol;
id = idCounter++;
}
NodeTree::~NodeTree() {
children.clear();
}
void NodeTree::setParent(NodeTree* parent) {
if (this->parent != NULL) {
this->parent->removeChild(this);
}
this->parent = parent;
}
NodeTree* NodeTree::getParent() {
return parent;
}
void NodeTree::addChild(NodeTree* child) {
if (findChild(child) == -1)
children.push_back(child);
}
int NodeTree::findChild(NodeTree* child) {
for (int i = 0; i < children.size(); i++) {
if (children[i] == child) {
return i;
}
}
return -1;
}
void NodeTree::removeChild(int index) {
children[index] = NULL;
children.erase(children.begin()+index);
}
void NodeTree::removeChild(NodeTree* child) {
int index = findChild(child);
if (index != 0) {
removeChild(index);
}
}
int NodeTree::size() {
int count = 0;
for (int i = 0; i < children.size(); i++) {
count += children[i]->size();
}
return 1+count;
}
NodeTree* NodeTree::get(int index) {
return children[index];
}
std::string NodeTree::getName() {
return name;
}
void NodeTree::setName(std::string name) {
this->name = name;
}
Symbol* NodeTree::getSymbol() {
return symbol;
}
void NodeTree::setSymbol(Symbol* symbol) {
this->symbol = symbol;
}
std::string NodeTree::DOTGraphString() {
return( "digraph Kraken { \n" + DOTGraphStringHelper() + "}");
}
std::string NodeTree::DOTGraphStringHelper() {
std::string ourDOTRelation = "";
for (int i = 0; i < children.size(); i++) {
ourDOTRelation += getDOTName() + " -> " + children[i]->getDOTName() + ";\n" + children[i]->DOTGraphStringHelper();
}
return(ourDOTRelation);
}
std::string NodeTree::getDOTName() {
if (symbol != NULL)
return "\"" + name + "-" + symbol->toString() + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one
return "\"" + name + "_" + intToString(id) + "\"";
}

View File

@@ -74,7 +74,6 @@ void Parser::loadGrammer(std::string grammerInputString) {
}
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
//std::cout << "Simple first set for " << token->toString() << std::endl;
std::vector<Symbol*> avoidList;
return firstSet(token, avoidList);
}
@@ -84,11 +83,8 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token, std::vector<Symbol*> &avoi
for (std::vector<Symbol*>::size_type i = 0; i < avoidList.size(); i++)
if (*(avoidList[i]) == *token) {
return new std::vector<Symbol*>();
//std::cout << "Avoiding firstSet for " << token->toString() << std::endl;
}
avoidList.push_back(token);
//std::cout << "Cpx first set for " << token->toString() << std::endl;
//std::cout << "Doing first set for " << token->toString() << std::endl;
std::vector<Symbol*>* first = new std::vector<Symbol*>();
//First, if the symbol is a terminal, than it's first set is just itself.
if (token->isTerminal()) {
@@ -314,7 +310,7 @@ std::string Parser::tableToString() {
return table.toString();
}
NodeTree* Parser::parseInput(std::string inputString) {
NodeTree<Symbol*>* Parser::parseInput(std::string inputString) {
lexer.setInput(inputString);
Symbol* token = lexer.next();
ParseAction* action;
@@ -370,11 +366,11 @@ NodeTree* Parser::parseInput(std::string inputString) {
}
}
NodeTree* Parser::reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols) {
NodeTree* newTree = new NodeTree(newSymbol->getName(), newSymbol);
NodeTree<Symbol*>* Parser::reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols) {
NodeTree<Symbol*>* newTree = new NodeTree<Symbol*>(newSymbol->getName(), newSymbol);
for (std::vector<Symbol*>::size_type i = 0; i < symbols.size(); i++) {
if (symbols[i]->isTerminal())
newTree->addChild(new NodeTree(symbols[i]->getName(), symbols[i]));
newTree->addChild(new NodeTree<Symbol*>(symbols[i]->getName(), symbols[i]));
else
newTree->addChild(symbols[i]->getSubTree());
}

116
src/RNGLRParser.cpp Normal file
View File

@@ -0,0 +1,116 @@
RNGLRParser::parseInput(std::string inputString) {
//Check for no tokens
if (inputString == "") {
if (table.get(0,EOFSymbol)->action == ParseAction::REDUCE)
std::cout << "Accepted!" << std::endl;
else
std::cout << "Rejected, no input (with no accepting state)" << std::endl;
return;
}
lexer.setInput(inputString);
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
//It could be converted to on-line later.
Symbol* currentToken = lexer.next();
input.push_back(currentToken);
while (*currentToken != *EOFToken) {
currentToken = lexer.next();
input.push_back(currentToken);
}
//Frontier 0, new node with state 0
GSSNode* v0 = gss.newNode(0);
gss.addToFrontier(0,v0);
std::vector<ParseAction*> firstActions = table.get(0, input[0]);
for (std::vector<ParseAction*>::size_type i = 0; i < firstActions.size(); i++) {
if (firstActions[i]->action == ParseAction::SHIFT)
toShift.push_back(std::make_pair(v0,firstActions[i]->toState()));
else if (firstActions[i]->action == ParseAction::REDUCE && firstActions[i]->reduceRule->getRightSide()->size() == 0) {
toReduce.push_back(std::make_pair(std::make_pair(v0, firstActions[i]->reduceRule->getLeftSide()), 0));
}
}
for (int i = 0; i < input.size(); i++) {
if (gss.frontierIsEmpty(i))
break;
while (toReduce.size() != 0)
reducer(i);
shifter(i);
}
if (gss.frontierHasAccSt(input.size()-1))
std::cout << "Accepted!" << std::endl;
else
std::cout << "Rejected!" << std::endl;
return;
}
RNGLRParser::reducer(int i) {
std::pair< std::pair<GSSNode*, Symbol*>, int > reduction = toReduce.front();
int pathLength = reduction.second > 0 : reduction.second -1 ? 0;
std::vector<GSSNode*>* reachable = gss.getReachable(reduction.first.first, pathLength);
for (std::vector<GSSNode*>::size_type j = 0; j < reachable->size(); j++) {
GSSNode* currentReached = (*reachable)[j];
int toState = table.getShift(currentReached->state(), reduction.first.second);
GSSNode* toStateNode = gss.inFrontier(i, toState);
if (toStateNode) {
if (!gss.hasEdge(toStateNode, currentReached)) {
gss.addEdge(toStateNode, currentReached);
if (reduction.second != 0) {
//Do all non null reductions
}
}
} else {
toStateNode = gss.newNode(toState);
gss.addToFrontier(i, toStateNode);
gss.addEdge(toStateNode, currentReached);
std::vector<ParseAction*> actions = table.get(toState, input[i+1]);
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
//Shift
if (actions[k]->action == ParseAction::SHIFT)
nextShifts.push_back(std::make_pair(toStateNode, actions[k]->shiftState));
else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule()->size() != 0)
toReduce.push_back(std::make_pair(std::make_pair(currentReached, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->size()));
else (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule()->size() == 0)
toReduce.push_back(std::make_pair(std::make_pair(toStateNode, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->size()));
}
}
}
}
RNGLRParser::shifter(int i) {
if (i != input.length()-1) {
std::queue<ParseAction*> nextShifts;
while (!toShift.empty()) {
std::pair<GSSNode*, int> shift = toShift.front();
GSSNode* shiftTo = gss.inFrontier(i+1, shift.second);
if (shiftTo) {
gss.addEdge(shiftTo, shift.first);
std::vector<ParseAction*> actions = table.get(shift.second, input[i+2]);
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule->size() != 0)
toReduce.push_back(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size()));
}
} else {
shiftTo = gss.newNode(shift.second);
gss.addToFrontier(i+1, shiftTo);
gss.addEdge(shiftTo, shift.first);
std::vector<ParseAction*> actions = table.get(shift.toState(), input[i+2]);
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
//Shift
if (actions[j]->action == ParseAction::SHIFT)
nextShifts.push_back(std::make_pair(shiftTo, actions[j]->shiftState));
else if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule()->size() != 0)
toReduce.push_back(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size()));
else (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule()->size() == 0)
toReduce.push_back(std::make_pair(std::make_pair(shiftTo, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size()));
}
}
}
toShift = nextShifts;
}
}

View File

@@ -17,7 +17,7 @@ void RegEx::construct() {
switch (pattern[i]) {
case '*':
{
std::cout << "Star at " << i << " in " << pattern << std::endl;
//std::cout << "Star at " << i << " in " << pattern << std::endl;
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
// currentStates[j]->addNext(currentStates[k]);
@@ -28,7 +28,7 @@ void RegEx::construct() {
break;
case '+':
{
std::cout << "Plus at " << i << " in " << pattern << std::endl;
//std::cout << "Plus at " << i << " in " << pattern << std::endl;
//OtherThingy
//current->addNext(current);
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
@@ -39,14 +39,14 @@ void RegEx::construct() {
break;
case '?':
{
std::cout << "Question at " << i << " in " << pattern << std::endl;
//std::cout << "Question at " << i << " in " << pattern << std::endl;
//add all previous states to current states to enable skipping over the questioned item
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
}
break;
case '|':
{
std::cout << "Alternation at " << i << " in " << pattern << std::endl;
//std::cout << "Alternation at " << i << " in " << pattern << std::endl;
//alternation
alternating = true;
}
@@ -54,7 +54,7 @@ void RegEx::construct() {
break;
case '(':
{
std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
//std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
//perentheses
//Create a peren node with an inner empty node
RegExState* next = new RegExState(new RegExState());
@@ -88,13 +88,13 @@ void RegEx::construct() {
currentStates.clear();
currentStates.push_back(next->getInner());
}
std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
//std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
}
break;
case ')':
{
std::cout << "End peren at " << i << " in " << pattern << std::endl;
//std::cout << "End peren at " << i << " in " << pattern << std::endl;
//perentheses
//Pop off the states that will now be the previous states and the peren node which will now be the current node
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
@@ -113,19 +113,19 @@ void RegEx::construct() {
case '\\':
{
i++;
std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
//std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
//Ahh, it's escaping a special character, so fall through to the default.
}
default:
{
std::cout << "Regular" << std::endl;
//std::cout << "Regular" << std::endl;
//Ahh, it's regular
RegExState* next = new RegExState(pattern[i]);
//If we're alternating, add next as the next for each previous state, and add self to currentStates
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
previousStates[j]->addNext(next);
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
}
currentStates.push_back(next);
alternating = false;
@@ -134,7 +134,7 @@ void RegEx::construct() {
//previous states, and add ourself as the new current state.
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
currentStates[j]->addNext(next);
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
}
previousStates.clear();
previousStates = currentStates;
@@ -150,7 +150,7 @@ void RegEx::construct() {
}
void RegEx::deperenthesize() {
std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
//std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
//Now go through and expand the peren nodes to regular nodes
std::vector<RegExState*> processedStates;
@@ -224,7 +224,7 @@ void RegEx::deperenthesize() {
}
}
}
std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
//std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
}
RegEx::~RegEx() {

View File

@@ -14,7 +14,7 @@ Symbol::Symbol(std::string name, bool isTerminal, std::string value) {
this->value = value;
}
Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) {
Symbol::Symbol(std::string name, bool isTerminal, NodeTree<Symbol*>* tree) {
this->name = name;
this->terminal = isTerminal;
this->subTree = tree;
@@ -40,11 +40,11 @@ Symbol* Symbol::clone() {
return new Symbol(name, terminal, subTree);
}
void Symbol::setSubTree(NodeTree* tree) {
void Symbol::setSubTree(NodeTree<Symbol*>* tree) {
subTree = tree;
}
NodeTree* Symbol::getSubTree() {
NodeTree<Symbol*>* Symbol::getSubTree() {
return subTree;
}

View File

@@ -112,4 +112,4 @@ std::string Table::toString() {
concat += "\n";
}
return(concat);
}
}