Added support for null rules, works well.

2013-07-09 02:45:59 -04:00
parent 4c2fd967f0
commit 1c85e3693e
5 changed files with 90 additions and 38 deletions
--- a/include/ParseRule.h
+++ b/include/ParseRule.h
@@ -26,6 +26,7 @@ class ParseRule {
 		void appendToRight(Symbol* appendee);

 		Symbol* getLeftSide();
+		void setRightSide(std::vector<Symbol*> &rightSide);
 		std::vector<Symbol*> getRightSide();
 		Symbol* getAtNextIndex();
 		Symbol* getAtIndex();
--- a/include/Parser.h
+++ b/include/Parser.h
@@ -47,6 +47,11 @@ class Parser {

 		std::vector< State* > stateSets;

+		//The EOFSymbol, a pointer because of use in table, etc
+		Symbol* EOFSymbol;
+		//The nullSymbol, ditto with above. Also used in comparisons
+		Symbol* nullSymbol;
+
 		std::vector< std::vector<ParseAction*>* >  table;
 		std::vector<Symbol*> symbolIndexVec;

--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -26,7 +26,7 @@ Symbol* Lexer::next() {
 	std::cout << "Current at is " << input.substr(currentPosition,input.length()-1) << " currentPos is " << currentPosition <<std::endl;
 	//If we're at the end, return an eof
 	if (currentPosition == input.length()-1)
-		return new Symbol("$EOF$", false);
+		return new Symbol("$EOF$", true);
 	int longestMatch = -1;
 	RegEx* longestRegEx = NULL;
 	std::string remainingString = input.substr(currentPosition,input.length()-1);
--- a/src/ParseRule.cpp
+++ b/src/ParseRule.cpp
@@ -45,6 +45,10 @@ Symbol* ParseRule::getLeftSide() {
 	return leftHandle;
 }

+void ParseRule::setRightSide(std::vector<Symbol*> &rightSide) {
+	this->rightSide = rightSide;
+}
+
 std::vector<Symbol*> ParseRule::getRightSide() {
 	return rightSide;
 }
--- a/src/Parser.cpp
+++ b/src/Parser.cpp
@@ -1,11 +1,13 @@
 #include "Parser.h"

 Parser::Parser() {
-
+	EOFSymbol = new Symbol("$EOF$", true);
+	nullSymbol = new Symbol("$NULL$", true);
 }

 Parser::~Parser() {
-
+	delete EOFSymbol;
+	delete nullSymbol;
 }

 Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
@@ -45,6 +47,10 @@ void Parser::loadGrammer(std::string grammerInputString) {
 			currToken = reader.word();
 			//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
 			if (currToken == "|") {
+				//If we haven't added anything, that means that this is a null rule
+				if (currentRule->getRightSide().size() == 0)
+					currentRule->appendToRight(nullSymbol);
+
 				loadedGrammer.push_back(currentRule);
 				currentRule = new ParseRule();
 				currentRule->setLeftHandle(leftSide);
@@ -52,11 +58,18 @@ void Parser::loadGrammer(std::string grammerInputString) {
 			}
 		}
 		//Add new rule to grammer
+		//If we haven't added anything, that means that this is a null rule
+		if (currentRule->getRightSide().size() == 0)
+			currentRule->appendToRight(nullSymbol);
+
 		loadedGrammer.push_back(currentRule);
 		//Get next token
 		currToken = reader.word();
 	}
 	std::cout << "Parsed!\n";
+
+	for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
+		std::cout << loadedGrammer[i]->toString() << std::endl;
 }

 std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
@@ -67,20 +80,34 @@ std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
 		return(first);
 	}
 	//Otherwise....
-	//Ok, to make a first set, go through the grammer, if the token is part of the left side, add it's production's first token's first set.
-	//Theoretically, if that one includes mull, do the next one too. However, null productions have not yet been implemented.
+	//Ok, to make a first set, go through the grammer, if the token it's left side, add it's production's first token's first set.
+	//If that one includes mull, do the next one too (if it exists).
 	Symbol* rightToken = NULL;
 	std::vector<Symbol*>* recursiveFirstSet = NULL;
 	for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
 		if (*token == *(loadedGrammer[i]->getLeftSide())) {
-			rightToken = loadedGrammer[i]->getRightSide()[0]; //Get the first token of the right side of this rule
-			if (rightToken->isTerminal())
-				first->push_back(rightToken);
-			else {
-				//Add the entire set
-				recursiveFirstSet = firstSet(rightToken);
+			//Loop through the rule adding first sets for each token if the previous token contained NULL
+			bool recFirstSetHasNull = false;
+			int j = 0;
+			do {
+				rightToken = loadedGrammer[i]->getRightSide()[j]; //Get token of the right side of this rule
+				if (rightToken->isTerminal()) {
+					recursiveFirstSet = new std::vector<Symbol*>();
+					recursiveFirstSet->push_back(rightToken);
+				} else {
+					//Add the entire set
+					recursiveFirstSet = firstSet(rightToken);
+				}
 				first->insert(first->end(), recursiveFirstSet->begin(), recursiveFirstSet->end());
-			}
+				//Check to see if the current recursiveFirstSet contains NULL, if so, then go through again with the next token. (if there is one)
+				recFirstSetHasNull = false;
+				for (std::vector<Symbol*>::size_type k = 0; k < recursiveFirstSet->size(); k++) {
+					if ((*(*recursiveFirstSet)[j]) == *nullSymbol) {
+						recFirstSetHasNull = true;
+					}
+				}
+				j++;
+			} while (recFirstSetHasNull && loadedGrammer[i]->getRightSide().size() > j);
 		}
 	}
 	return(first);
@@ -97,18 +124,6 @@ void Parser::printFirstSets() {
 	}
 }

-
-//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
-std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
-	//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
-	rule = rule->clone();
-	rule->advancePointer();
-	if (rule->isAtEnd())
-		return rule->getLookahead();
-	return firstSet(rule->getAtNextIndex());
-
-}
-
 void Parser::createStateSet() {
 	std::cout << "Begining creation of stateSet" << std::endl;
 	//First state has no parents
@@ -116,7 +131,7 @@ void Parser::createStateSet() {
 	//Set the first state's basis to be the goal rule with lookahead EOF
 	ParseRule* goalRule = loadedGrammer[0]->clone();
 	std::vector<Symbol*>* goalRuleLookahead = new std::vector<Symbol*>();
-	goalRuleLookahead->push_back(new Symbol("$EOF$", false));
+	goalRuleLookahead->push_back(EOFSymbol);
 	goalRule->setLookahead(goalRuleLookahead);
 	stateSets.push_back( new State(0, goalRule));
 	//std::cout << "Begining for main set for loop" << std::endl;
@@ -128,6 +143,36 @@ void Parser::createStateSet() {
 	}
 }

+//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
+std::vector<Symbol*>* Parser::incrementiveFollowSet(ParseRule* rule) {
+	//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
+	rule = rule->clone();
+	rule->advancePointer();
+
+	//Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one
+	std::vector<Symbol*>* followSet = new std::vector<Symbol*>();
+	std::vector<Symbol*>* symbolFirstSet;
+	bool symbolFirstSetHasNull = true;
+	while (symbolFirstSetHasNull && !rule->isAtEnd()) {
+		symbolFirstSetHasNull = false;
+		symbolFirstSet = firstSet(rule->getAtNextIndex());
+		for (std::vector<Symbol*>::size_type i = 0; i < symbolFirstSet->size(); i++) {
+			if (*((*symbolFirstSet)[i]) == *nullSymbol) {
+				symbolFirstSetHasNull = true;
+				break;
+			}
+		}
+		followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
+		delete symbolFirstSet;
+		rule->advancePointer();
+	}
+	if (rule->isAtEnd()) {
+		symbolFirstSet = rule->getLookahead();
+		followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
+	}
+	return followSet;
+}
+
 void Parser::closure(State* state) {
 	//Add all the applicable rules.
 	//std::cout << "Closure on " << state->toString() << " is" << std::endl;
@@ -142,8 +187,6 @@ void Parser::closure(State* state) {
 				//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
 				currentGramRule->setLookahead(incrementiveFollowSet(currentStateRule));

-				std::vector<Symbol*>* gramRuleLookahead = currentGramRule->getLookahead();
-				//std::cout << "Current lookahead for  " << currentGramRule->toString() << std::endl;
 				//Check to make sure not already in
 				bool isAlreadyInState = false;
 				for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal->size(); k++) {
@@ -196,12 +239,16 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state) {
 		//Also add any completed rules as reduces in the action table
 		//See if reduce
 		//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
+		std::vector<Symbol*>* lookahead = (*currStateTotal)[i]->getLookahead();
 		if ((*currStateTotal)[i]->isAtEnd()) {
-			std::vector<Symbol*>* lookahead = (*currStateTotal)[i]->getLookahead();
 			for (std::vector<Symbol*>::size_type j = 0; j < lookahead->size(); j++)
 				addToTable(state, (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
-		} else {
-			//std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl;
+		} else if (*((*currStateTotal)[i]->getAtNextIndex()) == *nullSymbol) {
+			//If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack)
+			ParseRule* nullRule = (*currStateTotal)[i]->clone();
+			nullRule->setRightSide(* new std::vector<Symbol*>());
+			for (std::vector<Symbol*>::size_type j = 0; j < lookahead->size(); j++)
+				addToTable(state, (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule));
 		}
 	}
 	//Put all our new states in the set of states only if they're not already there.
@@ -214,19 +261,14 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state) {
 			if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
 				stateAlreadyInAllStates = true;
 				//If it does exist, we should add it as the shift/goto in the action table
-				//std::cout << "State exists, is " << j << std::endl;
 				(*stateSets)[j]->addParents(newStates[i]->getParents());
 				addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
 				break;
-			}/* else {
-				std::cout << "State " << newStates[i]->toString() << " does not equal " << (*stateSets)[j]->toString() << std::endl;
-			}*/
+			}
 		}
 		if (!stateAlreadyInAllStates) {
-			stateSets->push_back(newStates[i]);
 			//If the state does not already exist, add it and add it as the shift/goto in the action table
-			//std::cout << "State does not exist" << std::endl;
-			//std::cout << "State is " << newStates[i]->toString() << std::endl;
+			stateSets->push_back(newStates[i]);
 			addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
 		}
 	}
@@ -244,7 +286,7 @@ void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* actio

 	//If this is the first time we're adding to the table, add the EOF character
 	if (symbolIndexVec.size() == 0)
-		symbolIndexVec.push_back(new Symbol("$EOF$", false));
+		symbolIndexVec.push_back(EOFSymbol);

 	//find what state num the from state is
 	int stateNum = -1;