More work towards RNGLR. First, NodeTree is now a template. Second, I've started writing the actual GLR parser and GSS and other things, but am still in the first write process.
This commit is contained in:
41
src/GraphStructuredStack.cpp
Normal file
41
src/GraphStructuredStack.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "GraphStructuredStack.h"
|
||||
|
||||
GraphStructuredStack::GraphStructuredStack() {
|
||||
//
|
||||
}
|
||||
|
||||
GraphStructuredStack::~GraphStructuredStack() {
|
||||
//
|
||||
}
|
||||
|
||||
GSSNode* GraphStructuredStack::newNode(int stateNum) {
|
||||
//
|
||||
}
|
||||
|
||||
void GraphStructuredStack::addToFrontier(int frontier, GSSNode* node) {
|
||||
//
|
||||
}
|
||||
|
||||
bool GraphStructuredStack::inFrontier(int frontier, int state) {
|
||||
//
|
||||
}
|
||||
|
||||
bool GraphStructuredStack::frontierIsEmpty(int frontier) {
|
||||
//
|
||||
}
|
||||
|
||||
bool GraphStructuredStack::frontierHasAccState(int frontier) {
|
||||
//
|
||||
}
|
||||
|
||||
std::vector<GSSNode*>* GraphStructuredStack::getReachable(GSSNode* start, int lenght) {
|
||||
//
|
||||
}
|
||||
|
||||
bool GraphStructuredStack::hasEdge(GSSNode* start, GSSNode* end) {
|
||||
//
|
||||
}
|
||||
|
||||
void GraphStructuredStack::addEdge(GSSNode* start, GSSNode* end) {
|
||||
//
|
||||
}
|
||||
106
src/NodeTree.cpp
106
src/NodeTree.cpp
@@ -1,106 +0,0 @@
|
||||
#include "NodeTree.h"
|
||||
|
||||
int NodeTree::idCounter;
|
||||
|
||||
NodeTree::NodeTree() {
|
||||
parent = NULL;
|
||||
name = "UnnamedNode";
|
||||
symbol = NULL;
|
||||
|
||||
id = idCounter++;
|
||||
}
|
||||
|
||||
NodeTree::NodeTree(std::string name, Symbol* inSymbol) {
|
||||
parent = NULL;
|
||||
symbol = NULL;
|
||||
this->name = name;
|
||||
this->symbol = inSymbol;
|
||||
id = idCounter++;
|
||||
}
|
||||
|
||||
NodeTree::~NodeTree() {
|
||||
children.clear();
|
||||
}
|
||||
|
||||
void NodeTree::setParent(NodeTree* parent) {
|
||||
if (this->parent != NULL) {
|
||||
this->parent->removeChild(this);
|
||||
}
|
||||
this->parent = parent;
|
||||
}
|
||||
|
||||
NodeTree* NodeTree::getParent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
void NodeTree::addChild(NodeTree* child) {
|
||||
if (findChild(child) == -1)
|
||||
children.push_back(child);
|
||||
}
|
||||
|
||||
int NodeTree::findChild(NodeTree* child) {
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
if (children[i] == child) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void NodeTree::removeChild(int index) {
|
||||
children[index] = NULL;
|
||||
children.erase(children.begin()+index);
|
||||
}
|
||||
|
||||
void NodeTree::removeChild(NodeTree* child) {
|
||||
int index = findChild(child);
|
||||
if (index != 0) {
|
||||
removeChild(index);
|
||||
}
|
||||
}
|
||||
|
||||
int NodeTree::size() {
|
||||
int count = 0;
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
count += children[i]->size();
|
||||
}
|
||||
return 1+count;
|
||||
}
|
||||
|
||||
NodeTree* NodeTree::get(int index) {
|
||||
return children[index];
|
||||
}
|
||||
|
||||
std::string NodeTree::getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
void NodeTree::setName(std::string name) {
|
||||
this->name = name;
|
||||
}
|
||||
|
||||
Symbol* NodeTree::getSymbol() {
|
||||
return symbol;
|
||||
}
|
||||
|
||||
void NodeTree::setSymbol(Symbol* symbol) {
|
||||
this->symbol = symbol;
|
||||
}
|
||||
|
||||
std::string NodeTree::DOTGraphString() {
|
||||
return( "digraph Kraken { \n" + DOTGraphStringHelper() + "}");
|
||||
}
|
||||
|
||||
std::string NodeTree::DOTGraphStringHelper() {
|
||||
std::string ourDOTRelation = "";
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
ourDOTRelation += getDOTName() + " -> " + children[i]->getDOTName() + ";\n" + children[i]->DOTGraphStringHelper();
|
||||
}
|
||||
return(ourDOTRelation);
|
||||
}
|
||||
|
||||
std::string NodeTree::getDOTName() {
|
||||
if (symbol != NULL)
|
||||
return "\"" + name + "-" + symbol->toString() + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one
|
||||
return "\"" + name + "_" + intToString(id) + "\"";
|
||||
}
|
||||
@@ -74,7 +74,6 @@ void Parser::loadGrammer(std::string grammerInputString) {
|
||||
}
|
||||
|
||||
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
||||
//std::cout << "Simple first set for " << token->toString() << std::endl;
|
||||
std::vector<Symbol*> avoidList;
|
||||
return firstSet(token, avoidList);
|
||||
}
|
||||
@@ -84,11 +83,8 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token, std::vector<Symbol*> &avoi
|
||||
for (std::vector<Symbol*>::size_type i = 0; i < avoidList.size(); i++)
|
||||
if (*(avoidList[i]) == *token) {
|
||||
return new std::vector<Symbol*>();
|
||||
//std::cout << "Avoiding firstSet for " << token->toString() << std::endl;
|
||||
}
|
||||
avoidList.push_back(token);
|
||||
//std::cout << "Cpx first set for " << token->toString() << std::endl;
|
||||
//std::cout << "Doing first set for " << token->toString() << std::endl;
|
||||
std::vector<Symbol*>* first = new std::vector<Symbol*>();
|
||||
//First, if the symbol is a terminal, than it's first set is just itself.
|
||||
if (token->isTerminal()) {
|
||||
@@ -314,7 +310,7 @@ std::string Parser::tableToString() {
|
||||
return table.toString();
|
||||
}
|
||||
|
||||
NodeTree* Parser::parseInput(std::string inputString) {
|
||||
NodeTree<Symbol*>* Parser::parseInput(std::string inputString) {
|
||||
lexer.setInput(inputString);
|
||||
Symbol* token = lexer.next();
|
||||
ParseAction* action;
|
||||
@@ -370,11 +366,11 @@ NodeTree* Parser::parseInput(std::string inputString) {
|
||||
}
|
||||
}
|
||||
|
||||
NodeTree* Parser::reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols) {
|
||||
NodeTree* newTree = new NodeTree(newSymbol->getName(), newSymbol);
|
||||
NodeTree<Symbol*>* Parser::reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols) {
|
||||
NodeTree<Symbol*>* newTree = new NodeTree<Symbol*>(newSymbol->getName(), newSymbol);
|
||||
for (std::vector<Symbol*>::size_type i = 0; i < symbols.size(); i++) {
|
||||
if (symbols[i]->isTerminal())
|
||||
newTree->addChild(new NodeTree(symbols[i]->getName(), symbols[i]));
|
||||
newTree->addChild(new NodeTree<Symbol*>(symbols[i]->getName(), symbols[i]));
|
||||
else
|
||||
newTree->addChild(symbols[i]->getSubTree());
|
||||
}
|
||||
|
||||
116
src/RNGLRParser.cpp
Normal file
116
src/RNGLRParser.cpp
Normal file
@@ -0,0 +1,116 @@
|
||||
|
||||
RNGLRParser::parseInput(std::string inputString) {
|
||||
|
||||
//Check for no tokens
|
||||
if (inputString == "") {
|
||||
if (table.get(0,EOFSymbol)->action == ParseAction::REDUCE)
|
||||
std::cout << "Accepted!" << std::endl;
|
||||
else
|
||||
std::cout << "Rejected, no input (with no accepting state)" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
lexer.setInput(inputString);
|
||||
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
|
||||
//It could be converted to on-line later.
|
||||
Symbol* currentToken = lexer.next();
|
||||
input.push_back(currentToken);
|
||||
while (*currentToken != *EOFToken) {
|
||||
currentToken = lexer.next();
|
||||
input.push_back(currentToken);
|
||||
}
|
||||
|
||||
//Frontier 0, new node with state 0
|
||||
GSSNode* v0 = gss.newNode(0);
|
||||
gss.addToFrontier(0,v0);
|
||||
|
||||
std::vector<ParseAction*> firstActions = table.get(0, input[0]);
|
||||
for (std::vector<ParseAction*>::size_type i = 0; i < firstActions.size(); i++) {
|
||||
if (firstActions[i]->action == ParseAction::SHIFT)
|
||||
toShift.push_back(std::make_pair(v0,firstActions[i]->toState()));
|
||||
else if (firstActions[i]->action == ParseAction::REDUCE && firstActions[i]->reduceRule->getRightSide()->size() == 0) {
|
||||
toReduce.push_back(std::make_pair(std::make_pair(v0, firstActions[i]->reduceRule->getLeftSide()), 0));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < input.size(); i++) {
|
||||
if (gss.frontierIsEmpty(i))
|
||||
break;
|
||||
while (toReduce.size() != 0)
|
||||
reducer(i);
|
||||
shifter(i);
|
||||
}
|
||||
if (gss.frontierHasAccSt(input.size()-1))
|
||||
std::cout << "Accepted!" << std::endl;
|
||||
else
|
||||
std::cout << "Rejected!" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
RNGLRParser::reducer(int i) {
|
||||
std::pair< std::pair<GSSNode*, Symbol*>, int > reduction = toReduce.front();
|
||||
int pathLength = reduction.second > 0 : reduction.second -1 ? 0;
|
||||
std::vector<GSSNode*>* reachable = gss.getReachable(reduction.first.first, pathLength);
|
||||
for (std::vector<GSSNode*>::size_type j = 0; j < reachable->size(); j++) {
|
||||
GSSNode* currentReached = (*reachable)[j];
|
||||
int toState = table.getShift(currentReached->state(), reduction.first.second);
|
||||
GSSNode* toStateNode = gss.inFrontier(i, toState);
|
||||
if (toStateNode) {
|
||||
if (!gss.hasEdge(toStateNode, currentReached)) {
|
||||
gss.addEdge(toStateNode, currentReached);
|
||||
if (reduction.second != 0) {
|
||||
//Do all non null reductions
|
||||
}
|
||||
}
|
||||
} else {
|
||||
toStateNode = gss.newNode(toState);
|
||||
gss.addToFrontier(i, toStateNode);
|
||||
gss.addEdge(toStateNode, currentReached);
|
||||
|
||||
std::vector<ParseAction*> actions = table.get(toState, input[i+1]);
|
||||
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
|
||||
//Shift
|
||||
if (actions[k]->action == ParseAction::SHIFT)
|
||||
nextShifts.push_back(std::make_pair(toStateNode, actions[k]->shiftState));
|
||||
else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule()->size() != 0)
|
||||
toReduce.push_back(std::make_pair(std::make_pair(currentReached, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->size()));
|
||||
else (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule()->size() == 0)
|
||||
toReduce.push_back(std::make_pair(std::make_pair(toStateNode, actions[k]->reduceRule->getLeftSide()), actions[k]->reduceRule->size()));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RNGLRParser::shifter(int i) {
|
||||
if (i != input.length()-1) {
|
||||
std::queue<ParseAction*> nextShifts;
|
||||
while (!toShift.empty()) {
|
||||
std::pair<GSSNode*, int> shift = toShift.front();
|
||||
GSSNode* shiftTo = gss.inFrontier(i+1, shift.second);
|
||||
if (shiftTo) {
|
||||
gss.addEdge(shiftTo, shift.first);
|
||||
std::vector<ParseAction*> actions = table.get(shift.second, input[i+2]);
|
||||
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
|
||||
if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule->size() != 0)
|
||||
toReduce.push_back(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size()));
|
||||
}
|
||||
} else {
|
||||
shiftTo = gss.newNode(shift.second);
|
||||
gss.addToFrontier(i+1, shiftTo);
|
||||
gss.addEdge(shiftTo, shift.first);
|
||||
std::vector<ParseAction*> actions = table.get(shift.toState(), input[i+2]);
|
||||
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
|
||||
//Shift
|
||||
if (actions[j]->action == ParseAction::SHIFT)
|
||||
nextShifts.push_back(std::make_pair(shiftTo, actions[j]->shiftState));
|
||||
else if (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule()->size() != 0)
|
||||
toReduce.push_back(std::make_pair(std::make_pair(shift.first, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size()));
|
||||
else (actions[j]->action == ParseAction::REDUCE && actions[j]->reduceRule()->size() == 0)
|
||||
toReduce.push_back(std::make_pair(std::make_pair(shiftTo, actions[j]->reduceRule->getLeftSide()), actions[j]->reduceRule->size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
toShift = nextShifts;
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ void RegEx::construct() {
|
||||
switch (pattern[i]) {
|
||||
case '*':
|
||||
{
|
||||
std::cout << "Star at " << i << " in " << pattern << std::endl;
|
||||
//std::cout << "Star at " << i << " in " << pattern << std::endl;
|
||||
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
|
||||
// currentStates[j]->addNext(currentStates[k]);
|
||||
@@ -28,7 +28,7 @@ void RegEx::construct() {
|
||||
break;
|
||||
case '+':
|
||||
{
|
||||
std::cout << "Plus at " << i << " in " << pattern << std::endl;
|
||||
//std::cout << "Plus at " << i << " in " << pattern << std::endl;
|
||||
//OtherThingy
|
||||
//current->addNext(current);
|
||||
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
@@ -39,14 +39,14 @@ void RegEx::construct() {
|
||||
break;
|
||||
case '?':
|
||||
{
|
||||
std::cout << "Question at " << i << " in " << pattern << std::endl;
|
||||
//std::cout << "Question at " << i << " in " << pattern << std::endl;
|
||||
//add all previous states to current states to enable skipping over the questioned item
|
||||
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
{
|
||||
std::cout << "Alternation at " << i << " in " << pattern << std::endl;
|
||||
//std::cout << "Alternation at " << i << " in " << pattern << std::endl;
|
||||
//alternation
|
||||
alternating = true;
|
||||
}
|
||||
@@ -54,7 +54,7 @@ void RegEx::construct() {
|
||||
break;
|
||||
case '(':
|
||||
{
|
||||
std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
|
||||
//std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
|
||||
//perentheses
|
||||
//Create a peren node with an inner empty node
|
||||
RegExState* next = new RegExState(new RegExState());
|
||||
@@ -88,13 +88,13 @@ void RegEx::construct() {
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next->getInner());
|
||||
}
|
||||
std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
|
||||
//std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
|
||||
}
|
||||
break;
|
||||
|
||||
case ')':
|
||||
{
|
||||
std::cout << "End peren at " << i << " in " << pattern << std::endl;
|
||||
//std::cout << "End peren at " << i << " in " << pattern << std::endl;
|
||||
//perentheses
|
||||
//Pop off the states that will now be the previous states and the peren node which will now be the current node
|
||||
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
|
||||
@@ -113,19 +113,19 @@ void RegEx::construct() {
|
||||
case '\\':
|
||||
{
|
||||
i++;
|
||||
std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
|
||||
//std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
|
||||
//Ahh, it's escaping a special character, so fall through to the default.
|
||||
}
|
||||
default:
|
||||
{
|
||||
std::cout << "Regular" << std::endl;
|
||||
//std::cout << "Regular" << std::endl;
|
||||
//Ahh, it's regular
|
||||
RegExState* next = new RegExState(pattern[i]);
|
||||
//If we're alternating, add next as the next for each previous state, and add self to currentStates
|
||||
if (alternating) {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
|
||||
previousStates[j]->addNext(next);
|
||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
|
||||
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
|
||||
}
|
||||
currentStates.push_back(next);
|
||||
alternating = false;
|
||||
@@ -134,7 +134,7 @@ void RegEx::construct() {
|
||||
//previous states, and add ourself as the new current state.
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||
currentStates[j]->addNext(next);
|
||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||
}
|
||||
previousStates.clear();
|
||||
previousStates = currentStates;
|
||||
@@ -150,7 +150,7 @@ void RegEx::construct() {
|
||||
}
|
||||
|
||||
void RegEx::deperenthesize() {
|
||||
std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
|
||||
//std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
|
||||
|
||||
//Now go through and expand the peren nodes to regular nodes
|
||||
std::vector<RegExState*> processedStates;
|
||||
@@ -224,7 +224,7 @@ void RegEx::deperenthesize() {
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
|
||||
//std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
|
||||
}
|
||||
|
||||
RegEx::~RegEx() {
|
||||
|
||||
@@ -14,7 +14,7 @@ Symbol::Symbol(std::string name, bool isTerminal, std::string value) {
|
||||
this->value = value;
|
||||
}
|
||||
|
||||
Symbol::Symbol(std::string name, bool isTerminal, NodeTree* tree) {
|
||||
Symbol::Symbol(std::string name, bool isTerminal, NodeTree<Symbol*>* tree) {
|
||||
this->name = name;
|
||||
this->terminal = isTerminal;
|
||||
this->subTree = tree;
|
||||
@@ -40,11 +40,11 @@ Symbol* Symbol::clone() {
|
||||
return new Symbol(name, terminal, subTree);
|
||||
}
|
||||
|
||||
void Symbol::setSubTree(NodeTree* tree) {
|
||||
void Symbol::setSubTree(NodeTree<Symbol*>* tree) {
|
||||
subTree = tree;
|
||||
}
|
||||
|
||||
NodeTree* Symbol::getSubTree() {
|
||||
NodeTree<Symbol*>* Symbol::getSubTree() {
|
||||
return subTree;
|
||||
}
|
||||
|
||||
|
||||
@@ -112,4 +112,4 @@ std::string Table::toString() {
|
||||
concat += "\n";
|
||||
}
|
||||
return(concat);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user