Problem was actually in grammer, fixed it. Also made identical rules with different lookahead merge. Now just started on creating parse trees. Stopping for night.
This commit is contained in:
@@ -28,9 +28,11 @@ class NodeTree {
|
||||
std::vector<NodeTree<T>*> getParents();
|
||||
|
||||
void addChild(NodeTree<T>* child);
|
||||
void addChildren(std::vector<NodeTree<T>*>* children);
|
||||
int findChild(NodeTree<T>* child);
|
||||
void removeChild(NodeTree<T>* child);
|
||||
void removeChild(int index);
|
||||
void clearChildren();
|
||||
std::vector<NodeTree<T>*> getChildren();
|
||||
|
||||
NodeTree<T>* get(int index);
|
||||
@@ -111,6 +113,12 @@ void NodeTree<T>::addChild(NodeTree<T>* child) {
|
||||
children.push_back(child);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) {
|
||||
for (std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
|
||||
addChild((*children)[i]);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
int NodeTree<T>::findChild(NodeTree<T>* child) {
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
@@ -135,6 +143,13 @@ void NodeTree<T>::removeChild(NodeTree<T>* child) {
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::clearChildren() {
|
||||
for (std::vector<T>::size_type i = 0; i < children.size(); i++)
|
||||
children[i] = NULL;
|
||||
children.clear();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
std::vector<NodeTree<T>*> NodeTree<T>::getChildren() {
|
||||
return children;
|
||||
|
||||
@@ -37,6 +37,7 @@ class ParseRule {
|
||||
bool isAtEnd();
|
||||
|
||||
void setLookahead(std::vector<Symbol*>* lookahead);
|
||||
void addLookahead(std::vector<Symbol*>* lookahead);
|
||||
std::vector<Symbol*>* getLookahead();
|
||||
|
||||
std::string toString();
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
#ifndef RNGLRPARSER_H
|
||||
#define RNGLRPARSER_H
|
||||
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include "Parser.h"
|
||||
@@ -8,16 +11,30 @@ class RNGLRParser: public Parser {
|
||||
RNGLRParser();
|
||||
~RNGLRParser();
|
||||
NodeTree<Symbol*>* parseInput(std::string inputString);
|
||||
|
||||
private:
|
||||
void reducer(int i);
|
||||
void shifter(int i);
|
||||
void addChildren(NodeTree<Symbol*>* parent, std::vector<NodeTree<Symbol*>*> children, int nullablePartsIndex);
|
||||
|
||||
void addStates(std::vector< State* >* stateSets, State* state);
|
||||
bool reducesToNull(ParseRule* rule);
|
||||
bool reducesToNull(ParseRule* rule, std::vector<Symbol*> avoidList);
|
||||
private:
|
||||
|
||||
bool belongsToFamily(NodeTree<Symbol*>* node, std::vector<NodeTree<Symbol*>*>* nodes);
|
||||
bool arePacked(std::vector<NodeTree<Symbol*>*>* nodes);
|
||||
bool isPacked(NodeTree<Symbol*>* node);
|
||||
void setPacked(NodeTree<Symbol*>* node, bool isPacked)
|
||||
|
||||
std::vector<Symbol*> input;
|
||||
GraphStructuredStack gss;
|
||||
//start node, lefthand side of the reduction, reduction length
|
||||
std::queue<std::pair< std::pair<NodeTree<int>*, Symbol*>, int > > toReduce;
|
||||
//Node coming from, state going to
|
||||
std::queue< std::pair<NodeTree<int>*, int> > toShift;
|
||||
|
||||
std::vector<NodeTree<Symbol*>*> nullableParts;
|
||||
std::map<NodeTree<Symbol*>*, bool> packedMap;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
2
main.cpp
2
main.cpp
@@ -54,7 +54,7 @@ int main(int argc, char* argv[]) {
|
||||
//std::cout << "Doing stateSetToString from Main" << std::endl;
|
||||
std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
|
||||
std::cout << parser.stateSetToString() << std::endl;
|
||||
//std::cout << "finished stateSetToString from Main" << std::endl;
|
||||
std::cout << "finished stateSetToString from Main" << std::endl;
|
||||
std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl;
|
||||
std::cout << parser.tableToString() << std::endl;
|
||||
std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl;
|
||||
|
||||
@@ -31,7 +31,7 @@ Symbol* Lexer::next() {
|
||||
RegEx* longestRegEx = NULL;
|
||||
std::string remainingString = input.substr(currentPosition,input.length()-1);
|
||||
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
|
||||
std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
|
||||
//std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
|
||||
int currentMatch = regExs[i]->longMatch(remainingString);
|
||||
if (currentMatch > longestMatch) {
|
||||
longestMatch = currentMatch;
|
||||
@@ -39,9 +39,10 @@ Symbol* Lexer::next() {
|
||||
}
|
||||
}
|
||||
if (longestRegEx != NULL) {
|
||||
std::string eatenString = input.substr(currentPosition, longestMatch+1);
|
||||
currentPosition += longestMatch + 1;
|
||||
std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
|
||||
return new Symbol(longestRegEx->getPattern(), true);
|
||||
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
|
||||
return new Symbol(longestRegEx->getPattern(), true, eatenString);
|
||||
} else {
|
||||
std::cout << "Found no applicable regex" << std::endl;
|
||||
std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;
|
||||
|
||||
@@ -89,6 +89,20 @@ void ParseRule::setLookahead(std::vector<Symbol*>* lookahead) {
|
||||
this->lookahead = lookahead;
|
||||
}
|
||||
|
||||
void ParseRule::addLookahead(std::vector<Symbol*>* lookahead) {
|
||||
for (std::vector<Symbol*>::size_type i = 0; i < lookahead->size(); i++) {
|
||||
bool alreadyIn = false;
|
||||
for (std::vector<Symbol*>::size_type j = 0; j < this->lookahead->size(); j++) {
|
||||
if (*((*lookahead)[i]) == *((*(this->lookahead))[j])) {
|
||||
alreadyIn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!alreadyIn)
|
||||
this->lookahead->push_back((*lookahead)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Symbol*>* ParseRule::getLookahead() {
|
||||
return lookahead;
|
||||
}
|
||||
|
||||
@@ -78,6 +78,35 @@ void Parser::loadGrammer(std::string grammerInputString) {
|
||||
std::cout << loadedGrammer[i]->toString() << std::endl;
|
||||
}
|
||||
|
||||
void Parser::createStateSet() {
|
||||
std::cout << "Begining creation of stateSet" << std::endl;
|
||||
//First state has no parents
|
||||
|
||||
//Set the first state's basis to be the goal rule with lookahead EOF
|
||||
ParseRule* goalRule = loadedGrammer[0]->clone();
|
||||
std::vector<Symbol*>* goalRuleLookahead = new std::vector<Symbol*>();
|
||||
goalRuleLookahead->push_back(EOFSymbol);
|
||||
goalRule->setLookahead(goalRuleLookahead);
|
||||
stateSets.push_back( new State(0, goalRule));
|
||||
//std::cout << "Begining for main set for loop" << std::endl;
|
||||
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
|
||||
//closure
|
||||
closure(stateSets[i]);
|
||||
//Add the new states
|
||||
addStates(&stateSets, stateSets[i]);
|
||||
}
|
||||
table.remove(1, EOFSymbol);
|
||||
}
|
||||
|
||||
int Parser::stateNum(State* state) {
|
||||
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
|
||||
if (*(stateSets[i]) == *state) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
||||
std::vector<Symbol*> avoidList;
|
||||
return firstSet(token, avoidList);
|
||||
@@ -131,35 +160,6 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token, std::vector<Symbol*> avoid
|
||||
return(first);
|
||||
}
|
||||
|
||||
void Parser::createStateSet() {
|
||||
std::cout << "Begining creation of stateSet" << std::endl;
|
||||
//First state has no parents
|
||||
|
||||
//Set the first state's basis to be the goal rule with lookahead EOF
|
||||
ParseRule* goalRule = loadedGrammer[0]->clone();
|
||||
std::vector<Symbol*>* goalRuleLookahead = new std::vector<Symbol*>();
|
||||
goalRuleLookahead->push_back(EOFSymbol);
|
||||
goalRule->setLookahead(goalRuleLookahead);
|
||||
stateSets.push_back( new State(0, goalRule));
|
||||
//std::cout << "Begining for main set for loop" << std::endl;
|
||||
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
|
||||
//closure
|
||||
closure(stateSets[i]);
|
||||
//Add the new states
|
||||
addStates(&stateSets, stateSets[i]);
|
||||
}
|
||||
table.remove(1, EOFSymbol);
|
||||
}
|
||||
|
||||
int Parser::stateNum(State* state) {
|
||||
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
|
||||
if (*(stateSets[i]) == *state) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
|
||||
std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
|
||||
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
|
||||
@@ -181,7 +181,7 @@ std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
|
||||
}
|
||||
}
|
||||
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
|
||||
delete symbolFirstSet;
|
||||
//delete symbolFirstSet;
|
||||
rule->advancePointer();
|
||||
}
|
||||
if (rule->isAtEnd()) {
|
||||
@@ -192,8 +192,10 @@ std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
|
||||
for (std::vector<Symbol*>::size_type i = 0; i < followSet->size(); i++) {
|
||||
bool alreadyIn = false;
|
||||
for (std::vector<Symbol*>::size_type j = 0; j < followSetReturn->size(); j++)
|
||||
if (*((*followSet)[i]) == *((*followSetReturn)[j]))
|
||||
if (*((*followSet)[i]) == *((*followSetReturn)[j])) {
|
||||
alreadyIn = true;
|
||||
break;
|
||||
}
|
||||
if (!alreadyIn)
|
||||
followSetReturn->push_back((*followSet)[i]);
|
||||
}
|
||||
@@ -219,7 +221,9 @@ void Parser::closure(State* state) {
|
||||
//Check to make sure not already in
|
||||
bool isAlreadyInState = false;
|
||||
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal->size(); k++) {
|
||||
if (*((*stateTotal)[k]) == *currentGramRule) {
|
||||
if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) {
|
||||
std::cout << (*stateTotal)[k]->toString() << std::endl;
|
||||
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
|
||||
isAlreadyInState = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -112,6 +112,7 @@ void RNGLRParser::reducer(int i) {
|
||||
gss.addEdge(toStateNode, currentReached);
|
||||
if (reduction.second != 0) {
|
||||
//Do all non null reduction
|
||||
std::cout << "Checking for non-null reductions in states that already existed" << std::endl;
|
||||
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
|
||||
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++)
|
||||
if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0)
|
||||
@@ -123,9 +124,10 @@ void RNGLRParser::reducer(int i) {
|
||||
gss.addToFrontier(i, toStateNode);
|
||||
gss.addEdge(toStateNode, currentReached);
|
||||
|
||||
std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
|
||||
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
|
||||
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
|
||||
//Shift
|
||||
std::cout << "Action is " << actions[k]->toString() << std::endl;
|
||||
if (actions[k]->action == ParseAction::SHIFT)
|
||||
toShift.push(std::make_pair(toStateNode, actions[k]->shiftState));
|
||||
else if (actions[k]->action == ParseAction::REDUCE && actions[k]->reduceRule->getRightSize() != 0)
|
||||
@@ -175,6 +177,48 @@ void RNGLRParser::shifter(int i) {
|
||||
}
|
||||
}
|
||||
|
||||
void RNGLRParser::addChildren(NodeTree<Symbol*>* parent, std::vector<NodeTree<Symbol*>*>* children, int nullablePartsIndex) {
|
||||
if (nullablePartsIndex != 0)
|
||||
children->push_back(nullableParts[nullablePartsIndex]);
|
||||
if (!belongsToFamily(parent, children)) {
|
||||
if (parent->getChildren().size() == 0) {
|
||||
parent->addChildren(children);
|
||||
} else {
|
||||
if (!arePacked(parent->getChildren())) {
|
||||
NodeTree<Symbol*>* subParent = new NodeTree<Symbol*>();
|
||||
setPacked(subParent, true);
|
||||
subParent->addChildren(&(parent->getChildren());
|
||||
parent->clearChildren();
|
||||
parent->addChild(subParent);
|
||||
}
|
||||
NodeTree<Symbol*>* t = new NodeTree<Symbol*>();
|
||||
setPacked(t, true);
|
||||
parent->addChild(t);
|
||||
t->addChildren(children);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool RNGLRParser::belongsToFamily(NodeTree<Symbol*>* node, std::vector<NodeTree<Symbol*>*>* nodes) {
|
||||
//
|
||||
}
|
||||
|
||||
bool RNGLRParser::arePacked(std::vector<NodeTree<Symbol*>*>* nodes) {
|
||||
bool packed = true;
|
||||
for (std::vector<NodeTree<Symbol*>*>::size_type i = 0; i < nodes->size(); i++)
|
||||
packed &= packedMap[node];
|
||||
return packed;
|
||||
}
|
||||
|
||||
bool RNGLRParser::isPacked(NodeTree<Symbol*>* node) {
|
||||
return packedMap[node];
|
||||
}
|
||||
|
||||
void RNGLRParser::setPacked(NodeTree<Symbol*>* node, bool isPacked) {
|
||||
packedMap[node] = isPacked;
|
||||
}
|
||||
|
||||
|
||||
//Have to use own add states function in order to construct RN table instead of LALR table
|
||||
void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) {
|
||||
std::vector< State* > newStates;
|
||||
|
||||
@@ -243,9 +243,9 @@ int RegEx::longMatch(std::string stringToMatch) {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||
if (currentStates[j]->isGoal()) {
|
||||
lastMatch = i-1;
|
||||
std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl;
|
||||
//std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl;
|
||||
} else {
|
||||
std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <<std::endl;
|
||||
//std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <<std::endl;
|
||||
}
|
||||
std::vector<RegExState*>* addStates = currentStates[j]->advance(stringToMatch.at(i));
|
||||
nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());
|
||||
|
||||
Reference in New Issue
Block a user