Fixes for state generation to reduce memory usage - adding in optional semicolons balooned our memory usage to somewhere under 8 gigs, with some simple refactoring we're back down to a bit over 4. Needs to be smaller, but it's an improvement

This commit is contained in:
Nathan Braswell
2015-03-23 14:35:28 -04:00
parent f8e82b5302
commit 2c4dbc60d1
11 changed files with 89 additions and 100 deletions

View File

@@ -12,9 +12,15 @@
#include <iostream>
class ParseRule {
private:
int pointerIndex;
Symbol leftHandle;
std::vector<Symbol> lookahead;
std::vector<Symbol> rightSide;
public:
ParseRule();
ParseRule(Symbol leftHandle, int pointerIndex, std::vector<Symbol> &rightSide, std::vector<Symbol>* lookahead);
ParseRule(Symbol leftHandle, int pointerIndex, std::vector<Symbol> &rightSide, std::vector<Symbol> lookahead);
~ParseRule();
const bool equalsExceptLookahead(const ParseRule &other) const;
bool const operator==(const ParseRule &other) const;
@@ -36,19 +42,12 @@ class ParseRule {
bool advancePointer();
bool isAtEnd();
void setLookahead(std::vector<Symbol>* lookahead);
void addLookahead(std::vector<Symbol>* lookahead);
std::vector<Symbol>* getLookahead();
void setLookahead(std::vector<Symbol> lookahead);
void addLookahead(std::vector<Symbol> lookahead);
std::vector<Symbol> getLookahead();
std::string toString(bool printLookahead = true);
std::string toDOT();
private:
int pointerIndex;
Symbol leftHandle;
std::vector<Symbol>* lookahead;
std::vector<Symbol> rightSide;
};
#endif

View File

@@ -44,7 +44,7 @@ class Parser {
std::map<Symbol, std::vector<Symbol>> tokenFirstSet;
std::map<Symbol, bool> tokenNullable;
std::vector<Symbol>* incrementiveFollowSet(ParseRule* rule);
std::vector<Symbol> incrementiveFollowSet(ParseRule* rule);
virtual void closure(State* state);
virtual void addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo);
int stateNum(State* state);

View File

@@ -24,7 +24,7 @@ class State {
bool const operator!=(const State &other);
std::vector<ParseRule*>* getBasis();
std::vector<ParseRule*>* getRemaining();
std::vector<ParseRule*>* getTotal();
std::vector<ParseRule*> getTotal();
bool containsRule(ParseRule* rule);
void addRuleCombineLookahead(ParseRule* rule);
std::string toString();
@@ -40,8 +40,7 @@ class State {
std::vector<ParseRule*> remaining;
private:
std::vector<State*> parents;
std::vector<ParseRule*> total;
int number;
};
#endif
#endif

View File

@@ -21,7 +21,7 @@ path_part = forward_slash alphanumeric | back_slash alphanumeric ;
forward_slash = "/" ;
back_slash = "\\" ;
# all for optional semicolons
# all for optional semicolons
line_break = "
+" ;
actual_white = "( | )+" | line_break | line_break actual_white | "( | )+" actual_white ;

View File

@@ -136,7 +136,7 @@ int main(int argc, char* argv[]) {
std::cerr << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl;
return -1;
}
delete binaryTablePointer;
delete [] binaryTablePointer;
}
if (!compGramGood) {

View File

@@ -2,10 +2,9 @@
ParseRule::ParseRule() {
pointerIndex = 0;
lookahead = NULL;
}
ParseRule::ParseRule(Symbol leftHandle, int pointerIndex, std::vector<Symbol> &rightSide, std::vector<Symbol>* lookahead) {
ParseRule::ParseRule(Symbol leftHandle, int pointerIndex, std::vector<Symbol> &rightSide, std::vector<Symbol> lookahead) {
this->leftHandle = leftHandle;
this->pointerIndex = pointerIndex;
this->rightSide = rightSide;
@@ -21,7 +20,7 @@ const bool ParseRule::equalsExceptLookahead(const ParseRule &other) const {
}
const bool ParseRule::operator==(const ParseRule &other) const {
return(equalsExceptLookahead(other) && (lookahead == NULL ? other.lookahead == NULL : (*lookahead) == *(other.lookahead)));
return(equalsExceptLookahead(other) && (lookahead == other.lookahead));
}
const bool ParseRule::operator!=(const ParseRule &other) const {
@@ -35,22 +34,13 @@ const bool ParseRule::operator<(const ParseRule &other) const {
if (rightSide != other.rightSide)
return rightSide < other.rightSide;
if (lookahead != other.lookahead) {
if (! (lookahead && other.lookahead)) {
return lookahead < other.lookahead;
} else {
return *lookahead < *(other.lookahead);
}
return lookahead < other.lookahead;
}
return false;
}
ParseRule* ParseRule::clone() {
std::vector<Symbol>* newLookahead = NULL;
if (lookahead) {
newLookahead = new std::vector<Symbol>();
*newLookahead = *lookahead;
}
return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) );
return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) );
}
void ParseRule::setLeftHandle(Symbol leftHandle) {
@@ -105,25 +95,25 @@ bool ParseRule::isAtEnd() {
return pointerIndex == rightSide.size();
}
void ParseRule::setLookahead(std::vector<Symbol>* lookahead) {
void ParseRule::setLookahead(std::vector<Symbol> lookahead) {
this->lookahead = lookahead;
}
void ParseRule::addLookahead(std::vector<Symbol>* lookahead) {
for (std::vector<Symbol>::size_type i = 0; i < lookahead->size(); i++) {
void ParseRule::addLookahead(std::vector<Symbol> lookahead) {
for (std::vector<Symbol>::size_type i = 0; i < lookahead.size(); i++) {
bool alreadyIn = false;
for (std::vector<Symbol>::size_type j = 0; j < this->lookahead->size(); j++) {
if ((*lookahead)[i] == (*(this->lookahead))[j]) {
for (std::vector<Symbol>::size_type j = 0; j < this->lookahead.size(); j++) {
if (lookahead[i] == this->lookahead[j]) {
alreadyIn = true;
break;
}
}
if (!alreadyIn)
this->lookahead->push_back((*lookahead)[i]);
this->lookahead.push_back(lookahead[i]);
}
}
std::vector<Symbol>* ParseRule::getLookahead() {
std::vector<Symbol> ParseRule::getLookahead() {
return lookahead;
}
@@ -136,10 +126,10 @@ std::string ParseRule::toString(bool printLookahead) {
}
if (pointerIndex >= rightSide.size())
concat += "(*)";
if (printLookahead && lookahead != NULL) {
if (printLookahead && lookahead.size()) {
concat += "**";
for (std::vector<Symbol>::size_type i = 0; i < lookahead->size(); i++)
concat += (*lookahead)[i].toString();
for (std::vector<Symbol>::size_type i = 0; i < lookahead.size(); i++)
concat += lookahead[i].toString();
concat += "**";
}
return(concat);

View File

@@ -102,20 +102,20 @@ void Parser::createStateSet() {
//Set the first state's basis to be the goal rule with lookahead EOF
ParseRule* goalRule = loadedGrammer[0]->clone();
std::vector<Symbol>* goalRuleLookahead = new std::vector<Symbol>();
goalRuleLookahead->push_back(EOFSymbol);
std::vector<Symbol> goalRuleLookahead;
goalRuleLookahead.push_back(EOFSymbol);
goalRule->setLookahead(goalRuleLookahead);
State* zeroState = new State(0, goalRule);
stateSets.push_back(zeroState);
std::queue<State*>* toDo = new std::queue<State*>();
toDo->push(zeroState);
std::queue<State*> toDo;
toDo.push(zeroState);
//std::cout << "Begining for main set for loop" << std::endl;
while (toDo->size()) {
while (toDo.size()) {
//closure
closure(toDo->front());
closure(toDo.front());
//Add the new states
addStates(&stateSets, toDo->front(), toDo);
toDo->pop();
addStates(&stateSets, toDo.front(), &toDo);
toDo.pop();
}
table.remove(1, EOFSymbol);
}
@@ -210,13 +210,13 @@ bool Parser::isNullableHelper(Symbol token, std::set<Symbol> done) {
}
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
std::vector<Symbol>* Parser::incrementiveFollowSet(ParseRule* rule) {
std::vector<Symbol> Parser::incrementiveFollowSet(ParseRule* rule) {
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
rule = rule->clone();
rule->advancePointer();
//Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one
std::vector<Symbol>* followSet = new std::vector<Symbol>();
std::vector<Symbol> followSet;
std::vector<Symbol> symbolFirstSet;
bool symbolFirstSetHasNull = true;
while (symbolFirstSetHasNull && !rule->isAtEnd()) {
@@ -229,34 +229,34 @@ std::vector<Symbol>* Parser::incrementiveFollowSet(ParseRule* rule) {
break;
}
}
followSet->insert(followSet->end(), symbolFirstSet.begin(), symbolFirstSet.end());
followSet.insert(followSet.end(), symbolFirstSet.begin(), symbolFirstSet.end());
rule->advancePointer();
}
if (rule->isAtEnd()) {
symbolFirstSet = *(rule->getLookahead());
followSet->insert(followSet->end(), symbolFirstSet.begin(), symbolFirstSet.end());
symbolFirstSet = rule->getLookahead();
followSet.insert(followSet.end(), symbolFirstSet.begin(), symbolFirstSet.end());
}
std::vector<Symbol>* followSetReturn = new std::vector<Symbol>();
for (std::vector<Symbol>::size_type i = 0; i < followSet->size(); i++) {
std::vector<Symbol> followSetReturn;
for (std::vector<Symbol>::size_type i = 0; i < followSet.size(); i++) {
bool alreadyIn = false;
for (std::vector<Symbol>::size_type j = 0; j < followSetReturn->size(); j++)
if ((*followSet)[i] == (*followSetReturn)[j]) {
for (std::vector<Symbol>::size_type j = 0; j < followSetReturn.size(); j++)
if (followSet[i] == followSetReturn[j]) {
alreadyIn = true;
break;
}
if (!alreadyIn)
followSetReturn->push_back((*followSet)[i]);
followSetReturn.push_back(followSet[i]);
}
delete followSet;
delete rule;
return followSetReturn;
}
void Parser::closure(State* state) {
//Add all the applicable rules.
//std::cout << "Closure on " << state->toString() << " is" << std::endl;
std::vector<ParseRule*>* stateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < stateTotal->size(); i++) {
ParseRule* currentStateRule = (*stateTotal)[i];
std::vector<ParseRule*> stateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < stateTotal.size(); i++) {
ParseRule* currentStateRule = stateTotal[i];
//If it's at it's end, move on. We can't advance it.
if(currentStateRule->isAtEnd())
continue;
@@ -271,10 +271,10 @@ void Parser::closure(State* state) {
//Check to make sure not already in
bool isAlreadyInState = false;
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal->size(); k++) {
if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) {
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal.size(); k++) {
if (stateTotal[k]->equalsExceptLookahead(*currentGramRule)) {
//std::cout << (*stateTotal)[k]->toString() << std::endl;
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
stateTotal[k]->addLookahead(currentGramRule->getLookahead());
isAlreadyInState = true;
delete currentGramRule;
break;
@@ -294,10 +294,10 @@ void Parser::closure(State* state) {
void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo) {
std::vector< State* > newStates;
//For each rule in the state we already have
std::vector<ParseRule*>* currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
std::vector<ParseRule*> currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal.size(); i++) {
//Clone the current rule
ParseRule* advancedRule = (*currStateTotal)[i]->clone();
ParseRule* advancedRule = currStateTotal[i]->clone();
//Try to advance the pointer, if sucessful see if it is the correct next symbol
if (advancedRule->advancePointer()) {
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
@@ -324,16 +324,16 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol>* lookahead = (*currStateTotal)[i]->getLookahead();
if ((*currStateTotal)[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++)
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
} else if ((*currStateTotal)[i]->getAtNextIndex() == nullSymbol) {
std::vector<Symbol> lookahead = currStateTotal[i]->getLookahead();
if (currStateTotal[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++)
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i]));
} else if (currStateTotal[i]->getAtNextIndex() == nullSymbol) {
//If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack)
ParseRule* nullRule = (*currStateTotal)[i]->clone();
nullRule->setRightSide(* new std::vector<Symbol>());
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++)
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule));
ParseRule* nullRule = currStateTotal[i]->clone();
nullRule->setRightSide(std::vector<Symbol>());
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++)
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, nullRule));
}
}
//Put all our new states in the set of states only if they're not already there.

View File

@@ -334,10 +334,10 @@ void RNGLRParser::setPacked(NodeTree<Symbol>* node, bool isPacked) {
void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo) {
std::vector< State* > newStates;
//For each rule in the state we already have
std::vector<ParseRule*>* currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
std::vector<ParseRule*> currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal.size(); i++) {
//Clone the current rule
ParseRule* advancedRule = (*currStateTotal)[i]->clone();
ParseRule* advancedRule = currStateTotal[i]->clone();
//Try to advance the pointer, if sucessful see if it is the correct next symbol
if (advancedRule->advancePointer()) {
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
@@ -397,24 +397,24 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
void RNGLRParser::addStateReductionsToTable(State* state) {
std::vector<ParseRule*>* currStateTotal = state->getTotal();
std::vector<ParseRule*> currStateTotal = state->getTotal();
//std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl;
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal.size(); i++) {
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol>* lookahead = (*currStateTotal)[i]->getLookahead();
if ((*currStateTotal)[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++) {
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
std::vector<Symbol> lookahead = currStateTotal[i]->getLookahead();
if (currStateTotal[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++) {
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i]));
}
//If this has an appropriate ruduction to null, get the reduce trees out
} else if (reducesToNull((*currStateTotal)[i])) {
} else if (reducesToNull(currStateTotal[i])) {
//std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
//It used to be that if is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side that is equal to
//the part that we've already gone through in the rule. (so we don't pop extra off stack)
//Now we use the same rule and make sure that the index location is used
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++)
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++)
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i]));
}
}
}

View File

@@ -25,10 +25,10 @@ const bool State::operator==(const State &other) {
return false;
}
if (remaining.size() != other.remaining.size())
if (remaining.size() != other.remaining.size())
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < remaining.size(); i++) {
if ( *(remaining[i]) != *(other.remaining[i]) )
if ( *(remaining[i]) != *(other.remaining[i]) )
return false;
}
return true;
@@ -77,12 +77,11 @@ void State::combineStates(State &other) {
addParents(other.getParents());
}
std::vector<ParseRule*>* State::getTotal() {
total.clear();
//std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl;
std::vector<ParseRule*> State::getTotal() {
std::vector<ParseRule*> total;
total.insert(total.begin(), basis.begin(), basis.end());
total.insert(total.end(), remaining.begin(), remaining.end());
return(&total);
return total;
}
std::vector<ParseRule*>* State::getBasis() {
return &basis;
@@ -92,7 +91,7 @@ std::vector<ParseRule*>* State::getRemaining() {
}
bool State::containsRule(ParseRule* rule) {
getTotal();
auto total = getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < total.size(); i++) {
if (*rule == *(total[i])) {
return true;
@@ -102,7 +101,7 @@ bool State::containsRule(ParseRule* rule) {
}
void State::addRuleCombineLookahead(ParseRule* rule) {
getTotal();
auto total = getTotal();
bool alreadyIn = false;
for (std::vector<ParseRule*>::size_type i = 0; i < total.size(); i++) {
if (rule->equalsExceptLookahead(*(total[i]))) {
@@ -162,4 +161,4 @@ std::vector<State*>* State::getDeepParents(int depth) {
int State::getNumber() {
return number;
}
}

View File

@@ -40,6 +40,8 @@ std::string StringReader::line(bool truncateEnd)
std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd)
{
if (str_pos >= rd_string.size())
return "";
size_t found_pos = rd_string.find_first_of(stop_chars, str_pos);
if (rd_string[str_pos] == '\"') {

View File

@@ -190,7 +190,7 @@ void Table::importTable(char* tableData) {
tableData += sizeof(bool);
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
}
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL);
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, std::vector<Symbol>());
}
int shiftState = *((int*)tableData);
tableData += sizeof(int);