In progress

This commit is contained in:
Nathan Braswell
2013-08-16 00:03:26 -04:00
parent d21f521266
commit 2eaf640855
10 changed files with 137 additions and 76 deletions

View File

@@ -23,6 +23,7 @@ class RNGLRParser: public Parser {
void addChildren(NodeTree<Symbol*>* parent, std::vector<NodeTree<Symbol*>*>* children, NodeTree<Symbol*>* nullableParts);
void addStates(std::vector< State* >* stateSets, State* state);
void addStateReductionsToTable(State* state);
bool fullyReducesToNull(ParseRule* rule);
bool reducesToNull(ParseRule* rule);
bool reducesToNull(ParseRule* rule, std::vector<Symbol*> avoidList);

View File

@@ -20,13 +20,16 @@ class State {
~State();
bool const operator==(const State &other);
bool const basisEquals(const State &other);
bool const basisEqualsExceptLookahead(const State &other);
bool const operator!=(const State &other);
std::vector<ParseRule*>* getBasis();
std::vector<ParseRule*>* getRemaining();
std::vector<ParseRule*>* getTotal();
bool containsRule(ParseRule* rule);
void addRuleCombineLookahead(ParseRule* rule);
std::string toString();
void combineStates(State &other);
void addParents(std::vector<State*>* parents);
std::vector<State*>* getParents();
std::vector<State*>* getDeepParents(int depth);

View File

@@ -48,29 +48,29 @@ int main(int argc, char* argv[]) {
RNGLRParser parser;
parser.loadGrammer(grammerInputFileString);
//std::cout << "Creating State Set from Main" << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nState Set" << std::endl;
std::cout << "\nState Set" << std::endl;
parser.createStateSet();
//std::cout << "finished State Set from Main" << std::endl;
//std::cout << "Doing stateSetToString from Main" << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
std::cout << parser.stateSetToString() << std::endl;
std::cout << "finished stateSetToString from Main" << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl;
std::cout << parser.tableToString() << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl;
std::cout << grammerInputFileString << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer toString" << std::endl;
std::cout << parser.grammerToString() << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl;
std::cout << parser.tableToString() << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl;
// std::cout << grammerInputFileString << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer toString" << std::endl;
// std::cout << parser.grammerToString() << std::endl;
//std::cout << parser.grammerToDOT() << std::endl;
//outFile << parser.grammerToDOT() << std::endl;
std::cout << "\n\n\n\n\n\n\n\n\n\nParsing" << std::endl;
std::cout << "\nParsing" << std::endl;
std::cout << programInputFileString << std::endl;
NodeTree<Symbol*>* parseTree = parser.parseInput(programInputFileString);
if (parseTree) {
std::cout << parseTree->DOTGraphString() << std::endl;
//std::cout << parseTree->DOTGraphString() << std::endl;
outFile << parseTree->DOTGraphString() << std::endl;
}

View File

@@ -15,10 +15,10 @@ NodeTree<int>* GraphStructuredStack::newNode(int stateNum) {
void GraphStructuredStack::addToFrontier(int frontier, NodeTree<int>* node) {
//First, make sure our vector has this and lesser frontiers. If not, add it and up to it
while (gss.size() <= frontier) {
std::cout << "Adding a new frontier: " << gss.size() << std::endl;
//std::cout << "Adding a new frontier: " << gss.size() << std::endl;
gss.push_back(new std::vector<NodeTree<int>*>());
}
std::cout << "Adding " << node << " (" << node->getData() << ") to frontier " << frontier << std::endl;
//std::cout << "Adding " << node << " (" << node->getData() << ") to frontier " << frontier << std::endl;
gss[frontier]->push_back(node);
}
@@ -63,7 +63,7 @@ std::vector<NodeTree<int>*>* GraphStructuredStack::getReachable(NodeTree<int>* s
NodeTree<int>* currentNode = currentNodes.front();
currentNodes.pop();
std::vector<NodeTree<int>*> children = currentNode->getChildren();
std::cout << currentNode->getData() << " has children ";
//std::cout << currentNode->getData() << " has children ";
for (std::vector<NodeTree<int>*>::size_type j = 0; j < children.size(); j++) {
std::cout << children[j]->getData() << " ";
nextNodes.push(children[j]);
@@ -77,7 +77,7 @@ std::vector<NodeTree<int>*>* GraphStructuredStack::getReachable(NodeTree<int>* s
}
while (!currentNodes.empty()) {
reachableList->push_back(currentNodes.front());
std::cout << currentNodes.front()->getData() << " is reachable from " << start->getData() << " by length " << length << std::endl;
//std::cout << currentNodes.front()->getData() << " is reachable from " << start->getData() << " by length " << length << std::endl;
currentNodes.pop();
}
return reachableList;

View File

@@ -23,7 +23,7 @@ void Lexer::addRegEx(std::string regExString) {
}
Symbol* Lexer::next() {
std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
//If we're at the end, return an eof
if (currentPosition >= input.length()-1)
return new Symbol("$EOF$", true);
@@ -44,8 +44,8 @@ Symbol* Lexer::next() {
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
return new Symbol(longestRegEx->getPattern(), true, eatenString);
} else {
std::cout << "Found no applicable regex" << std::endl;
std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;
//std::cout << "Found no applicable regex" << std::endl;
//std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;
return NULL;
}
}

View File

@@ -74,8 +74,8 @@ void Parser::loadGrammer(std::string grammerInputString) {
}
std::cout << "Parsed!\n";
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
std::cout << loadedGrammer[i]->toString() << std::endl;
// for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
// std::cout << loadedGrammer[i]->toString() << std::endl;
}
void Parser::createStateSet() {
@@ -222,7 +222,7 @@ void Parser::closure(State* state) {
bool isAlreadyInState = false;
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal->size(); k++) {
if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) {
std::cout << (*stateTotal)[k]->toString() << std::endl;
//std::cout << (*stateTotal)[k]->toString() << std::endl;
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
isAlreadyInState = true;
break;

View File

@@ -33,7 +33,7 @@ NodeTree<Symbol*>* RNGLRParser::parseInput(std::string inputString) {
Symbol* currentToken = lexer.next();
input.push_back(currentToken);
while (*currentToken != *EOFSymbol) {
std::cout << EOFSymbol->toString() << " " << currentToken->toString() << std::endl;
//std::cout << EOFSymbol->toString() << " " << currentToken->toString() << std::endl;
currentToken = lexer.next();
if (currentToken != NULL) {
input.push_back(currentToken);
@@ -43,12 +43,12 @@ NodeTree<Symbol*>* RNGLRParser::parseInput(std::string inputString) {
}
}
std::cout << "\n\n\nDone with Lexing\n\n\n" << std::endl;
std::cout << "\nDone with Lexing\n" << std::endl;
for (int i = 0; i < input.size(); i++)
std::cout << "|" << input[i]->toString() << "|";
std::cout << std::endl;
// for (int i = 0; i < input.size(); i++)
// std::cout << "|" << input[i]->toString() << "|";
// std::cout << std::endl;
std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
@@ -70,12 +70,12 @@ NodeTree<Symbol*>* RNGLRParser::parseInput(std::string inputString) {
}
}
std::cout << "GSS:\n" << gss.toString() << std::endl;
// std::cout << "GSS:\n" << gss.toString() << std::endl;
std::cout << "Starting parse loop" << std::endl;
for (int i = 0; i < input.size(); i++) {
std::cout << "Checking if frontier " << i << " is empty" << std::endl;
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
if (gss.frontierIsEmpty(i)) {
std::cout << "Frontier " << i << " is empty." << std::endl;
std::cout << "Failed on " << input[i]->toString() << std::endl;
@@ -86,13 +86,13 @@ NodeTree<Symbol*>* RNGLRParser::parseInput(std::string inputString) {
SPPFStepNodes.clear();
while (toReduce.size() != 0) {
std::cout << "Reducing for " << i << std::endl;
//std::cout << "Reducing for " << i << std::endl;
//std::cout << "GSS:\n" << gss.toString() << std::endl;
reducer(i);
}
std::cout << "Shifting for " << i << std::endl;
// std::cout << "Shifting for " << i << std::endl;
shifter(i);
std::cout << "GSS:\n" << gss.toString() << std::endl;
//std::cout << "GSS:\n" << gss.toString() << std::endl;
}
std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
NodeTree<int>* accState = gss.frontierGetAccState(input.size()-1);
@@ -109,7 +109,7 @@ NodeTree<Symbol*>* RNGLRParser::parseInput(std::string inputString) {
void RNGLRParser::reducer(int i) {
Reduction reduction = toReduce.front();
toReduce.pop();
std::cout << "Doing reduction of length " << reduction.length << " from state " << reduction.from->getData() << " to symbol " << reduction.symbol->toString() << std::endl;
//std::cout << "Doing reduction of length " << reduction.length << " from state " << reduction.from->getData() << " to symbol " << reduction.symbol->toString() << std::endl;
int pathLength = reduction.length > 0 ? reduction.length -1 : 0;
//Get every reachable path
std::vector<std::vector<NodeTree<int>*> >* paths = gss.getReachablePaths(reduction.from, pathLength);
@@ -155,7 +155,7 @@ void RNGLRParser::reducer(int i) {
gss.addEdge(toStateNode, currentReached, newLabel);
if (reduction.length != 0) {
//Do all non null reduction
std::cout << "Checking for non-null reductions in states that already existed" << std::endl;
//std::cout << "Checking for non-null reductions in states that already existed" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
if (actions[k]->action == ParseAction::REDUCE && !fullyReducesToNull(actions[k]->reduceRule)) {
@@ -170,7 +170,7 @@ void RNGLRParser::reducer(int i) {
gss.addToFrontier(i, toStateNode);
gss.addEdge(toStateNode, currentReached, newLabel);
std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
//std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
std::cout << "Action is " << actions[k]->toString() << std::endl;
@@ -200,7 +200,7 @@ void RNGLRParser::shifter(int i) {
std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
NodeTree<int>* shiftTo = gss.inFrontier(i+1, shift.second);
if (shiftTo) {
std::cout << "State already existed, just adding edge" << std::endl;
//std::cout << "State already existed, just adding edge" << std::endl;
gss.addEdge(shiftTo, shift.first, newLabel);
std::vector<ParseAction*> actions = *(table.get(shift.second, input[i+1]));
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
@@ -210,7 +210,7 @@ void RNGLRParser::shifter(int i) {
}
}
} else {
std::cout << "State did not already exist, adding" << std::endl;
//std::cout << "State did not already exist, adding" << std::endl;
shiftTo = gss.newNode(shift.second);
gss.addToFrontier(i+1, shiftTo);
gss.addEdge(shiftTo, shift.first, newLabel);
@@ -259,7 +259,7 @@ void RNGLRParser::addChildren(NodeTree<Symbol*>* parent, std::vector<NodeTree<Sy
}
bool RNGLRParser::belongsToFamily(NodeTree<Symbol*>* node, std::vector<NodeTree<Symbol*>*>* nodes) {
std::cout << "Checking " << node->getData()->toString() << "'s family" << std::endl;
//std::cout << "Checking " << node->getData()->toString() << "'s family" << std::endl;
std::vector<NodeTree<Symbol*>*> children = node->getChildren();
for (std::vector<NodeTree<Symbol*>*>::size_type i = 0; i < nodes->size(); i++) {
bool containsOne = false;
@@ -312,9 +312,8 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) {
for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) {
if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) {
symbolAlreadyInState = true;
//So now check to see if this exact rule is in this state
if (!newStates[j]->containsRule(advancedRule))
newStates[j]->basis.push_back(advancedRule);
//Add rule to state, combining with idenical rule except lookahead if exists
newStates[j]->addRuleCombineLookahead(advancedRule);
//We found a state with the same symbol, so stop searching
break;
}
@@ -324,7 +323,36 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) {
newStates.push_back(newState);
}
}
//Also add any completed rules as reduces in the action table
}
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false;
Symbol* currStateSymbol;
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
stateAlreadyInAllStates = false;
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
if (newStates[i]->basisEqualsExceptLookahead(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
(*stateSets)[j]->combineStates(*(newStates[i]));
addStateReductionsToTable((*stateSets)[j]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
}
}
if (!stateAlreadyInAllStates) {
//If the state does not already exist, add it and add it as the shift/goto in the action table
stateSets->push_back(newStates[i]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
}
}
//Also add any completed rules as reduces in the action table
addStateReductionsToTable(state);
}
void RNGLRParser::addStateReductionsToTable(State* state) {
std::vector<ParseRule*>* currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol*>* lookahead = (*currStateTotal)[i]->getLookahead();
@@ -333,7 +361,7 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) {
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
//If this has an appropriate ruduction to null, get the reduce trees out
} else if (reducesToNull((*currStateTotal)[i])) {
std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
//std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
//If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side that is equal to
//the part that we've already gone through in the rule. (so we don't pop extra off stack)
//Now we use the same rule and make sure that the index location is used
@@ -346,27 +374,6 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state) {
//table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule));
}
}
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false;
Symbol* currStateSymbol;
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
stateAlreadyInAllStates = false;
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
(*stateSets)[j]->addParents(newStates[i]->getParents());
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
}
}
if (!stateAlreadyInAllStates) {
//If the state does not already exist, add it and add it as the shift/goto in the action table
stateSets->push_back(newStates[i]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
}
}
}
bool RNGLRParser::fullyReducesToNull(ParseRule* rule) {
@@ -421,8 +428,7 @@ NodeTree<Symbol*>* RNGLRParser::getNullableParts(ParseRule* rule) {
NodeTree<Symbol*>* RNGLRParser::getNullableParts(ParseRule* rule, std::vector<NodeTree<Symbol*>*> avoidList) {
if (reducesToNull(rule)) {
std::cout << "Reduces to null so adding parts " << rule->toString() << std::endl;
//return new NodeTree<Symbol*>("FAKE_PARTS_FOR_NO_CRASH", nullSymbol);
//std::cout << "Reduces to null so adding parts " << rule->toString() << std::endl;
Symbol* symbol = rule->getLeftSide();
NodeTree<Symbol*>* symbolNode = new NodeTree<Symbol*>(symbol->getName(), symbol);
if (*(rule->getAtNextIndex()) == *nullSymbol) {

View File

@@ -263,8 +263,8 @@ int RegEx::longMatch(std::string stringToMatch) {
if (!inCurrStates)
currentStates.push_back(nextStates[j]);
}
if (currentStates.size() != 0)
std::cout << "Matched " << i << " character: " << stringToMatch[i-1] << std::endl;
// if (currentStates.size() != 0)
// std::cout << "Matched " << i << " character: " << stringToMatch[i-1] << std::endl;
nextStates.clear();
//If we can't continue matching, just return our last matched

View File

@@ -44,12 +44,39 @@ const bool State::basisEquals(const State &other) {
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) {
if (*(basis[i]) != *(other.basis[i]))
if (*(basis[i]) != (*(other.basis[i])))
return false;
}
return true;
}
const bool State::basisEqualsExceptLookahead(const State &other) {
//return (basis == other.basis && remaining == other.remaining);
if (basis.size() != other.basis.size())
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) {
if (!basis[i]->equalsExceptLookahead(*(other.basis[i])))
return false;
}
return true;
}
void State::combineStates(State &other) {
for (std::vector< ParseRule* >::size_type i = 0; i < other.basis.size(); i++) {
bool alreadyIn = false;
for (std::vector< ParseRule* >::size_type j = 0; j < basis.size(); j++) {
if (basis[j]->equalsExceptLookahead(*(other.basis[i]))) {
basis[j]->addLookahead(other.basis[i]->getLookahead());
alreadyIn = true;
}
}
if (!alreadyIn)
basis.push_back(other.basis[i]);
}
addParents(other.getParents());
}
std::vector<ParseRule*>* State::getTotal() {
total.clear();
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
@@ -68,17 +95,28 @@ std::vector<ParseRule*>* State::getRemaining() {
}
bool State::containsRule(ParseRule* rule) {
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
if (*rule == *(basis[i]))
return true;
}
for (std::vector<ParseRule*>::size_type i = 0; i < remaining.size(); i++) {
if (*rule == *(remaining[i]))
getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < total.size(); i++) {
if (*rule == *(total[i])) {
return true;
}
}
return false;
}
void State::addRuleCombineLookahead(ParseRule* rule) {
getTotal();
bool alreadyIn = false;
for (std::vector<ParseRule*>::size_type i = 0; i < total.size(); i++) {
if (rule->equalsExceptLookahead(*(total[i]))) {
total[i]->addLookahead(rule->getLookahead());
alreadyIn = true;
}
}
if (!alreadyIn)
basis.push_back(rule);
}
std::string State::toString() {
std::string concat = "";
concat += "State " + intToString(number) + " with " + intToString(parents.size()) + " parents:\n";

View File

@@ -67,7 +67,13 @@ void Table::add(int stateNum, Symbol* tranSymbol, ParseAction* action) {
//std::cout << "not Null!" << std::endl;
//std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << " on " << tranSymbol->toString() << std::endl;
(*(table[stateNum]))[symbolIndex]->push_back(action);
//Check to see if this action is already in the list
bool alreadyIn = false;
for (std::vector<ParseAction*>::size_type i = 0; i < (*(table[stateNum]))[symbolIndex]->size(); i++)
if (*((*((*(table[stateNum]))[symbolIndex]))[i]) == *action)
alreadyIn = true;
if (!alreadyIn)
(*(table[stateNum]))[symbolIndex]->push_back(action);
}
}
@@ -98,8 +104,15 @@ std::vector<ParseAction*>* Table::get(int state, Symbol* token) {
return NULL;
}
std::cout << "Get for state: " << state << ", and Symbol: " << token->toString() << std::endl;
//std::cout << "Get for state: " << state << ", and Symbol: " << token->toString() << std::endl;
if (state < 0 || state >= table.size()) {
std::cout << "State bad: " << state << std::endl;
return NULL;
}
if (symbolIndex < 0 || symbolIndex >= table[state]->size()) {
std::cout << "Symbol bad for this state: " << token->toString() << std::endl;
return NULL;
}
std::vector<ParseAction*>* action = (*(table[state]))[symbolIndex];
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec