Added support for null rules, works well.

This commit is contained in:
Nathan Braswell
2013-07-09 02:45:59 -04:00
parent 4c2fd967f0
commit 1c85e3693e
5 changed files with 90 additions and 38 deletions

View File

@@ -26,6 +26,7 @@ class ParseRule {
void appendToRight(Symbol* appendee);
Symbol* getLeftSide();
void setRightSide(std::vector<Symbol*> &rightSide);
std::vector<Symbol*> getRightSide();
Symbol* getAtNextIndex();
Symbol* getAtIndex();

View File

@@ -47,6 +47,11 @@ class Parser {
std::vector< State* > stateSets;
//The EOFSymbol, a pointer because of use in table, etc
Symbol* EOFSymbol;
//The nullSymbol, ditto with above. Also used in comparisons
Symbol* nullSymbol;
std::vector< std::vector<ParseAction*>* > table;
std::vector<Symbol*> symbolIndexVec;

View File

@@ -26,7 +26,7 @@ Symbol* Lexer::next() {
std::cout << "Current at is " << input.substr(currentPosition,input.length()-1) << " currentPos is " << currentPosition <<std::endl;
//If we're at the end, return an eof
if (currentPosition == input.length()-1)
return new Symbol("$EOF$", false);
return new Symbol("$EOF$", true);
int longestMatch = -1;
RegEx* longestRegEx = NULL;
std::string remainingString = input.substr(currentPosition,input.length()-1);

View File

@@ -45,6 +45,10 @@ Symbol* ParseRule::getLeftSide() {
return leftHandle;
}
void ParseRule::setRightSide(std::vector<Symbol*> &rightSide) {
this->rightSide = rightSide;
}
std::vector<Symbol*> ParseRule::getRightSide() {
return rightSide;
}

View File

@@ -1,11 +1,13 @@
#include "Parser.h"
Parser::Parser() {
EOFSymbol = new Symbol("$EOF$", true);
nullSymbol = new Symbol("$NULL$", true);
}
Parser::~Parser() {
delete EOFSymbol;
delete nullSymbol;
}
Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
@@ -45,6 +47,10 @@ void Parser::loadGrammer(std::string grammerInputString) {
currToken = reader.word();
//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
if (currToken == "|") {
//If we haven't added anything, that means that this is a null rule
if (currentRule->getRightSide().size() == 0)
currentRule->appendToRight(nullSymbol);
loadedGrammer.push_back(currentRule);
currentRule = new ParseRule();
currentRule->setLeftHandle(leftSide);
@@ -52,11 +58,18 @@ void Parser::loadGrammer(std::string grammerInputString) {
}
}
//Add new rule to grammer
//If we haven't added anything, that means that this is a null rule
if (currentRule->getRightSide().size() == 0)
currentRule->appendToRight(nullSymbol);
loadedGrammer.push_back(currentRule);
//Get next token
currToken = reader.word();
}
std::cout << "Parsed!\n";
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
std::cout << loadedGrammer[i]->toString() << std::endl;
}
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
@@ -67,20 +80,34 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
return(first);
}
//Otherwise....
//Ok, to make a first set, go through the grammer, if the token is part of the left side, add it's production's first token's first set.
//Theoretically, if that one includes mull, do the next one too. However, null productions have not yet been implemented.
//Ok, to make a first set, go through the grammer, if the token it's left side, add it's production's first token's first set.
//If that one includes mull, do the next one too (if it exists).
Symbol* rightToken = NULL;
std::vector<Symbol*>* recursiveFirstSet = NULL;
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
if (*token == *(loadedGrammer[i]->getLeftSide())) {
rightToken = loadedGrammer[i]->getRightSide()[0]; //Get the first token of the right side of this rule
if (rightToken->isTerminal())
first->push_back(rightToken);
else {
//Add the entire set
recursiveFirstSet = firstSet(rightToken);
//Loop through the rule adding first sets for each token if the previous token contained NULL
bool recFirstSetHasNull = false;
int j = 0;
do {
rightToken = loadedGrammer[i]->getRightSide()[j]; //Get token of the right side of this rule
if (rightToken->isTerminal()) {
recursiveFirstSet = new std::vector<Symbol*>();
recursiveFirstSet->push_back(rightToken);
} else {
//Add the entire set
recursiveFirstSet = firstSet(rightToken);
}
first->insert(first->end(), recursiveFirstSet->begin(), recursiveFirstSet->end());
}
//Check to see if the current recursiveFirstSet contains NULL, if so, then go through again with the next token. (if there is one)
recFirstSetHasNull = false;
for (std::vector<Symbol*>::size_type k = 0; k < recursiveFirstSet->size(); k++) {
if ((*(*recursiveFirstSet)[j]) == *nullSymbol) {
recFirstSetHasNull = true;
}
}
j++;
} while (recFirstSetHasNull && loadedGrammer[i]->getRightSide().size() > j);
}
}
return(first);
@@ -97,18 +124,6 @@ void Parser::printFirstSets() {
}
}
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
rule = rule->clone();
rule->advancePointer();
if (rule->isAtEnd())
return rule->getLookahead();
return firstSet(rule->getAtNextIndex());
}
void Parser::createStateSet() {
std::cout << "Begining creation of stateSet" << std::endl;
//First state has no parents
@@ -116,7 +131,7 @@ void Parser::createStateSet() {
//Set the first state's basis to be the goal rule with lookahead EOF
ParseRule* goalRule = loadedGrammer[0]->clone();
std::vector<Symbol*>* goalRuleLookahead = new std::vector<Symbol*>();
goalRuleLookahead->push_back(new Symbol("$EOF$", false));
goalRuleLookahead->push_back(EOFSymbol);
goalRule->setLookahead(goalRuleLookahead);
stateSets.push_back( new State(0, goalRule));
//std::cout << "Begining for main set for loop" << std::endl;
@@ -128,6 +143,36 @@ void Parser::createStateSet() {
}
}
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
rule = rule->clone();
rule->advancePointer();
//Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one
std::vector<Symbol*>* followSet = new std::vector<Symbol*>();
std::vector<Symbol*>* symbolFirstSet;
bool symbolFirstSetHasNull = true;
while (symbolFirstSetHasNull && !rule->isAtEnd()) {
symbolFirstSetHasNull = false;
symbolFirstSet = firstSet(rule->getAtNextIndex());
for (std::vector<Symbol*>::size_type i = 0; i < symbolFirstSet->size(); i++) {
if (*((*symbolFirstSet)[i]) == *nullSymbol) {
symbolFirstSetHasNull = true;
break;
}
}
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
delete symbolFirstSet;
rule->advancePointer();
}
if (rule->isAtEnd()) {
symbolFirstSet = rule->getLookahead();
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
}
return followSet;
}
void Parser::closure(State* state) {
//Add all the applicable rules.
//std::cout << "Closure on " << state->toString() << " is" << std::endl;
@@ -142,8 +187,6 @@ void Parser::closure(State* state) {
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
currentGramRule->setLookahead(incrementiveFollowSet(currentStateRule));
std::vector<Symbol*>* gramRuleLookahead = currentGramRule->getLookahead();
//std::cout << "Current lookahead for " << currentGramRule->toString() << std::endl;
//Check to make sure not already in
bool isAlreadyInState = false;
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal->size(); k++) {
@@ -196,12 +239,16 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state) {
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol*>* lookahead = (*currStateTotal)[i]->getLookahead();
if ((*currStateTotal)[i]->isAtEnd()) {
std::vector<Symbol*>* lookahead = (*currStateTotal)[i]->getLookahead();
for (std::vector<Symbol*>::size_type j = 0; j < lookahead->size(); j++)
addToTable(state, (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
} else {
//std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl;
} else if (*((*currStateTotal)[i]->getAtNextIndex()) == *nullSymbol) {
//If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack)
ParseRule* nullRule = (*currStateTotal)[i]->clone();
nullRule->setRightSide(* new std::vector<Symbol*>());
for (std::vector<Symbol*>::size_type j = 0; j < lookahead->size(); j++)
addToTable(state, (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule));
}
}
//Put all our new states in the set of states only if they're not already there.
@@ -214,19 +261,14 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state) {
if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
//std::cout << "State exists, is " << j << std::endl;
(*stateSets)[j]->addParents(newStates[i]->getParents());
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
}/* else {
std::cout << "State " << newStates[i]->toString() << " does not equal " << (*stateSets)[j]->toString() << std::endl;
}*/
}
}
if (!stateAlreadyInAllStates) {
stateSets->push_back(newStates[i]);
//If the state does not already exist, add it and add it as the shift/goto in the action table
//std::cout << "State does not exist" << std::endl;
//std::cout << "State is " << newStates[i]->toString() << std::endl;
stateSets->push_back(newStates[i]);
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
}
}
@@ -244,7 +286,7 @@ void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* actio
//If this is the first time we're adding to the table, add the EOF character
if (symbolIndexVec.size() == 0)
symbolIndexVec.push_back(new Symbol("$EOF$", false));
symbolIndexVec.push_back(EOFSymbol);
//find what state num the from state is
int stateNum = -1;