Problem was actually in grammer, fixed it. Also made identical rules with different lookahead merge. Now just started on creating parse trees. Stopping for night.

This commit is contained in:
Nathan Braswell
2013-08-06 01:49:45 -04:00
parent 9460bacf1c
commit 680d978dcb
9 changed files with 136 additions and 40 deletions

View File

@@ -28,9 +28,11 @@ class NodeTree {
std::vector<NodeTree<T>*> getParents();
void addChild(NodeTree<T>* child);
void addChildren(std::vector<NodeTree<T>*>* children);
int findChild(NodeTree<T>* child);
void removeChild(NodeTree<T>* child);
void removeChild(int index);
void clearChildren();
std::vector<NodeTree<T>*> getChildren();
NodeTree<T>* get(int index);
@@ -111,6 +113,12 @@ void NodeTree<T>::addChild(NodeTree<T>* child) {
children.push_back(child);
}
template<class T>
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) {
for (std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
addChild((*children)[i]);
}
template<class T>
int NodeTree<T>::findChild(NodeTree<T>* child) {
for (int i = 0; i < children.size(); i++) {
@@ -135,6 +143,13 @@ void NodeTree<T>::removeChild(NodeTree<T>* child) {
}
}
template<class T>
void NodeTree<T>::clearChildren() {
for (std::vector<T>::size_type i = 0; i < children.size(); i++)
children[i] = NULL;
children.clear();
}
template<class T>
std::vector<NodeTree<T>*> NodeTree<T>::getChildren() {
return children;

View File

@@ -37,6 +37,7 @@ class ParseRule {
bool isAtEnd();
void setLookahead(std::vector<Symbol*>* lookahead);
void addLookahead(std::vector<Symbol*>* lookahead);
std::vector<Symbol*>* getLookahead();
std::string toString();

View File

@@ -1,3 +1,6 @@
#ifndef RNGLRPARSER_H
#define RNGLRPARSER_H
#include <iostream>
#include <queue>
#include "Parser.h"
@@ -8,16 +11,30 @@ class RNGLRParser: public Parser {
RNGLRParser();
~RNGLRParser();
NodeTree<Symbol*>* parseInput(std::string inputString);
private:
void reducer(int i);
void shifter(int i);
void addChildren(NodeTree<Symbol*>* parent, std::vector<NodeTree<Symbol*>*> children, int nullablePartsIndex);
void addStates(std::vector< State* >* stateSets, State* state);
bool reducesToNull(ParseRule* rule);
bool reducesToNull(ParseRule* rule, std::vector<Symbol*> avoidList);
private:
bool belongsToFamily(NodeTree<Symbol*>* node, std::vector<NodeTree<Symbol*>*>* nodes);
bool arePacked(std::vector<NodeTree<Symbol*>*>* nodes);
bool isPacked(NodeTree<Symbol*>* node);
void setPacked(NodeTree<Symbol*>* node, bool isPacked)
std::vector<Symbol*> input;
GraphStructuredStack gss;
//start node, lefthand side of the reduction, reduction length
std::queue<std::pair< std::pair<NodeTree<int>*, Symbol*>, int > > toReduce;
//Node coming from, state going to
std::queue< std::pair<NodeTree<int>*, int> > toShift;
std::vector<NodeTree<Symbol*>*> nullableParts;
std::map<NodeTree<Symbol*>*, bool> packedMap;
};
#endif

View File

@@ -54,7 +54,7 @@ int main(int argc, char* argv[]) {
//std::cout << "Doing stateSetToString from Main" << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
std::cout << parser.stateSetToString() << std::endl;
//std::cout << "finished stateSetToString from Main" << std::endl;
std::cout << "finished stateSetToString from Main" << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl;
std::cout << parser.tableToString() << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl;

View File

@@ -31,7 +31,7 @@ Symbol* Lexer::next() {
RegEx* longestRegEx = NULL;
std::string remainingString = input.substr(currentPosition,input.length()-1);
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
//std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
int currentMatch = regExs[i]->longMatch(remainingString);
if (currentMatch > longestMatch) {
longestMatch = currentMatch;
@@ -39,9 +39,10 @@ Symbol* Lexer::next() {
}
}
if (longestRegEx != NULL) {
std::string eatenString = input.substr(currentPosition, longestMatch+1);
currentPosition += longestMatch + 1;
std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
return new Symbol(longestRegEx->getPattern(), true);
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
return new Symbol(longestRegEx->getPattern(), true, eatenString);
} else {
std::cout << "Found no applicable regex" << std::endl;
std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;

View File

@@ -89,6 +89,20 @@ void ParseRule::setLookahead(std::vector<Symbol*>* lookahead) {
this->lookahead = lookahead;
}
void ParseRule::addLookahead(std::vector<Symbol*>* lookahead) {
for (std::vector<Symbol*>::size_type i = 0; i < lookahead->size(); i++) {
bool alreadyIn = false;
for (std::vector<Symbol*>::size_type j = 0; j < this->lookahead->size(); j++) {
if (*((*lookahead)[i]) == *((*(this->lookahead))[j])) {
alreadyIn = true;
break;
}
}
if (!alreadyIn)
this->lookahead->push_back((*lookahead)[i]);
}
}
std::vector<Symbol*>* ParseRule::getLookahead() {
return lookahead;
}

View File

@@ -78,6 +78,35 @@ void Parser::loadGrammer(std::string grammerInputString) {
std::cout << loadedGrammer[i]->toString() << std::endl;
}
void Parser::createStateSet() {
std::cout << "Begining creation of stateSet" << std::endl;
//First state has no parents
//Set the first state's basis to be the goal rule with lookahead EOF
ParseRule* goalRule = loadedGrammer[0]->clone();
std::vector<Symbol*>* goalRuleLookahead = new std::vector<Symbol*>();
goalRuleLookahead->push_back(EOFSymbol);
goalRule->setLookahead(goalRuleLookahead);
stateSets.push_back( new State(0, goalRule));
//std::cout << "Begining for main set for loop" << std::endl;
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
//closure
closure(stateSets[i]);
//Add the new states
addStates(&stateSets, stateSets[i]);
}
table.remove(1, EOFSymbol);
}
int Parser::stateNum(State* state) {
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
if (*(stateSets[i]) == *state) {
return i;
}
}
return -1;
}
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
std::vector<Symbol*> avoidList;
return firstSet(token, avoidList);
@@ -131,35 +160,6 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token, std::vector<Symbol*> avoid
return(first);
}
void Parser::createStateSet() {
std::cout << "Begining creation of stateSet" << std::endl;
//First state has no parents
//Set the first state's basis to be the goal rule with lookahead EOF
ParseRule* goalRule = loadedGrammer[0]->clone();
std::vector<Symbol*>* goalRuleLookahead = new std::vector<Symbol*>();
goalRuleLookahead->push_back(EOFSymbol);
goalRule->setLookahead(goalRuleLookahead);
stateSets.push_back( new State(0, goalRule));
//std::cout << "Begining for main set for loop" << std::endl;
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
//closure
closure(stateSets[i]);
//Add the new states
addStates(&stateSets, stateSets[i]);
}
table.remove(1, EOFSymbol);
}
int Parser::stateNum(State* state) {
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
if (*(stateSets[i]) == *state) {
return i;
}
}
return -1;
}
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
@@ -181,7 +181,7 @@ std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
}
}
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
delete symbolFirstSet;
//delete symbolFirstSet;
rule->advancePointer();
}
if (rule->isAtEnd()) {
@@ -192,8 +192,10 @@ std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
for (std::vector<Symbol*>::size_type i = 0; i < followSet->size(); i++) {
bool alreadyIn = false;
for (std::vector<Symbol*>::size_type j = 0; j < followSetReturn->size(); j++)
if (*((*followSet)[i]) == *((*followSetReturn)[j]))
if (*((*followSet)[i]) == *((*followSetReturn)[j])) {
alreadyIn = true;
break;
}
if (!alreadyIn)
followSetReturn->push_back((*followSet)[i]);
}
@@ -219,7 +221,9 @@ void Parser::closure(State* state) {
//Check to make sure not already in
bool isAlreadyInState = false;
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal->size(); k++) {
if (*((*stateTotal)[k]) == *currentGramRule) {
if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) {
std::cout << (*stateTotal)[k]->toString() << std::endl;
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
isAlreadyInState = true;
break;
}

View File

@@ -112,6 +112,7 @@ void RNGLRParser::reducer(int i) {
gss.addEdge(toStateNode, currentReached);
if (reduction.second != 0) {
//Do all non null reduction
std::cout << "Checking for non-null reductions in states that already existed" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++)
if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0)
@@ -123,9 +124,10 @@ void RNGLRParser::reducer(int i) {
gss.addToFrontier(i, toStateNode);
gss.addEdge(toStateNode, currentReached);
std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
//Shift
std::cout << "Action is " << actions[k]->toString() << std::endl;
if (actions[k]->action == ParseAction::SHIFT)
toShift.push(std::make_pair(toStateNode, actions[k]->shiftState));
else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0)
@@ -175,6 +177,48 @@ void RNGLRParser::shifter(int i) {
}
}
void RNGLRParser::addChildren(NodeTree<Symbol*>* parent, std::vector<NodeTree<Symbol*>*>* children, int nullablePartsIndex) {
if (nullablePartsIndex != 0)
children->push_back(nullableParts[nullablePartsIndex]);
if (!belongsToFamily(parent, children)) {
if (parent->getChildren().size() == 0) {
parent->addChildren(children);
} else {
if (!arePacked(parent->getChildren())) {
NodeTree<Symbol*>* subParent = new NodeTree<Symbol*>();
setPacked(subParent, true);
subParent->addChildren(&(parent->getChildren());
parent->clearChildren();
parent->addChild(subParent);
}
NodeTree<Symbol*>* t = new NodeTree<Symbol*>();
setPacked(t, true);
parent->addChild(t);
t->addChildren(children);
}
}
}
bool RNGLRParser::belongsToFamily(NodeTree<Symbol*>* node, std::vector<NodeTree<Symbol*>*>* nodes) {
//
}
bool RNGLRParser::arePacked(std::vector<NodeTree<Symbol*>*>* nodes) {
bool packed = true;
for (std::vector<NodeTree<Symbol*>*>::size_type i = 0; i < nodes->size(); i++)
packed &= packedMap[node];
return packed;
}
bool RNGLRParser::isPacked(NodeTree<Symbol*>* node) {
return packedMap[node];
}
void RNGLRParser::setPacked(NodeTree<Symbol*>* node, bool isPacked) {
packedMap[node] = isPacked;
}
//Have to use own add states function in order to construct RN table instead of LALR table
void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) {
std::vector< State* > newStates;

View File

@@ -243,9 +243,9 @@ int RegEx::longMatch(std::string stringToMatch) {
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
if (currentStates[j]->isGoal()) {
lastMatch = i-1;
std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl;
//std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl;
} else {
std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <<std::endl;
//std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <<std::endl;
}
std::vector<RegExState*>* addStates = currentStates[j]->advance(stringToMatch.at(i));
nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());