From 2c4dbc60d1bf4e38a175006b6dc648d9f7da6a3f Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Mon, 23 Mar 2015 14:35:28 -0400 Subject: [PATCH] Fixes for state generation to reduce memory usage - adding in optional semicolons balooned our memory usage to somewhere under 8 gigs, with some simple refactoring we're back down to a bit over 4. Needs to be smaller, but it's an improvement --- include/ParseRule.h | 21 ++++++------- include/Parser.h | 2 +- include/State.h | 5 ++- krakenGrammer.kgm | 2 +- main.cpp | 2 +- src/ParseRule.cpp | 38 +++++++++-------------- src/Parser.cpp | 74 ++++++++++++++++++++++---------------------- src/RNGLRParser.cpp | 24 +++++++------- src/State.cpp | 17 +++++----- src/StringReader.cpp | 2 ++ src/Table.cpp | 2 +- 11 files changed, 89 insertions(+), 100 deletions(-) diff --git a/include/ParseRule.h b/include/ParseRule.h index 94d602a..e81fc33 100644 --- a/include/ParseRule.h +++ b/include/ParseRule.h @@ -12,9 +12,15 @@ #include class ParseRule { + private: + int pointerIndex; + Symbol leftHandle; + std::vector lookahead; + std::vector rightSide; + public: ParseRule(); - ParseRule(Symbol leftHandle, int pointerIndex, std::vector &rightSide, std::vector* lookahead); + ParseRule(Symbol leftHandle, int pointerIndex, std::vector &rightSide, std::vector lookahead); ~ParseRule(); const bool equalsExceptLookahead(const ParseRule &other) const; bool const operator==(const ParseRule &other) const; @@ -36,19 +42,12 @@ class ParseRule { bool advancePointer(); bool isAtEnd(); - void setLookahead(std::vector* lookahead); - void addLookahead(std::vector* lookahead); - std::vector* getLookahead(); + void setLookahead(std::vector lookahead); + void addLookahead(std::vector lookahead); + std::vector getLookahead(); std::string toString(bool printLookahead = true); std::string toDOT(); - - private: - int pointerIndex; - Symbol leftHandle; - std::vector* lookahead; - std::vector rightSide; - }; #endif diff --git a/include/Parser.h b/include/Parser.h index 25daf02..cd9a9f8 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -44,7 +44,7 @@ class Parser { std::map> tokenFirstSet; std::map tokenNullable; - std::vector* incrementiveFollowSet(ParseRule* rule); + std::vector incrementiveFollowSet(ParseRule* rule); virtual void closure(State* state); virtual void addStates(std::vector< State* >* stateSets, State* state, std::queue* toDo); int stateNum(State* state); diff --git a/include/State.h b/include/State.h index bb2d08e..d07d1a8 100644 --- a/include/State.h +++ b/include/State.h @@ -24,7 +24,7 @@ class State { bool const operator!=(const State &other); std::vector* getBasis(); std::vector* getRemaining(); - std::vector* getTotal(); + std::vector getTotal(); bool containsRule(ParseRule* rule); void addRuleCombineLookahead(ParseRule* rule); std::string toString(); @@ -40,8 +40,7 @@ class State { std::vector remaining; private: std::vector parents; - std::vector total; int number; }; -#endif \ No newline at end of file +#endif diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 2127389..e8dcb48 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -21,7 +21,7 @@ path_part = forward_slash alphanumeric | back_slash alphanumeric ; forward_slash = "/" ; back_slash = "\\" ; -# all for optional semicolons +# all for optional semicolons line_break = " +" ; actual_white = "( | )+" | line_break | line_break actual_white | "( | )+" actual_white ; diff --git a/main.cpp b/main.cpp index b12f34b..cb1d8d1 100644 --- a/main.cpp +++ b/main.cpp @@ -136,7 +136,7 @@ int main(int argc, char* argv[]) { std::cerr << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl; return -1; } - delete binaryTablePointer; + delete [] binaryTablePointer; } if (!compGramGood) { diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index 2b8996c..a8bc130 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -2,10 +2,9 @@ ParseRule::ParseRule() { pointerIndex = 0; - lookahead = NULL; } -ParseRule::ParseRule(Symbol leftHandle, int pointerIndex, std::vector &rightSide, std::vector* lookahead) { +ParseRule::ParseRule(Symbol leftHandle, int pointerIndex, std::vector &rightSide, std::vector lookahead) { this->leftHandle = leftHandle; this->pointerIndex = pointerIndex; this->rightSide = rightSide; @@ -21,7 +20,7 @@ const bool ParseRule::equalsExceptLookahead(const ParseRule &other) const { } const bool ParseRule::operator==(const ParseRule &other) const { - return(equalsExceptLookahead(other) && (lookahead == NULL ? other.lookahead == NULL : (*lookahead) == *(other.lookahead))); + return(equalsExceptLookahead(other) && (lookahead == other.lookahead)); } const bool ParseRule::operator!=(const ParseRule &other) const { @@ -35,22 +34,13 @@ const bool ParseRule::operator<(const ParseRule &other) const { if (rightSide != other.rightSide) return rightSide < other.rightSide; if (lookahead != other.lookahead) { - if (! (lookahead && other.lookahead)) { - return lookahead < other.lookahead; - } else { - return *lookahead < *(other.lookahead); - } + return lookahead < other.lookahead; } return false; } ParseRule* ParseRule::clone() { - std::vector* newLookahead = NULL; - if (lookahead) { - newLookahead = new std::vector(); - *newLookahead = *lookahead; - } - return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) ); + return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) ); } void ParseRule::setLeftHandle(Symbol leftHandle) { @@ -105,25 +95,25 @@ bool ParseRule::isAtEnd() { return pointerIndex == rightSide.size(); } -void ParseRule::setLookahead(std::vector* lookahead) { +void ParseRule::setLookahead(std::vector lookahead) { this->lookahead = lookahead; } -void ParseRule::addLookahead(std::vector* lookahead) { - for (std::vector::size_type i = 0; i < lookahead->size(); i++) { +void ParseRule::addLookahead(std::vector lookahead) { + for (std::vector::size_type i = 0; i < lookahead.size(); i++) { bool alreadyIn = false; - for (std::vector::size_type j = 0; j < this->lookahead->size(); j++) { - if ((*lookahead)[i] == (*(this->lookahead))[j]) { + for (std::vector::size_type j = 0; j < this->lookahead.size(); j++) { + if (lookahead[i] == this->lookahead[j]) { alreadyIn = true; break; } } if (!alreadyIn) - this->lookahead->push_back((*lookahead)[i]); + this->lookahead.push_back(lookahead[i]); } } -std::vector* ParseRule::getLookahead() { +std::vector ParseRule::getLookahead() { return lookahead; } @@ -136,10 +126,10 @@ std::string ParseRule::toString(bool printLookahead) { } if (pointerIndex >= rightSide.size()) concat += "(*)"; - if (printLookahead && lookahead != NULL) { + if (printLookahead && lookahead.size()) { concat += "**"; - for (std::vector::size_type i = 0; i < lookahead->size(); i++) - concat += (*lookahead)[i].toString(); + for (std::vector::size_type i = 0; i < lookahead.size(); i++) + concat += lookahead[i].toString(); concat += "**"; } return(concat); diff --git a/src/Parser.cpp b/src/Parser.cpp index e32a9cf..c19d421 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -102,20 +102,20 @@ void Parser::createStateSet() { //Set the first state's basis to be the goal rule with lookahead EOF ParseRule* goalRule = loadedGrammer[0]->clone(); - std::vector* goalRuleLookahead = new std::vector(); - goalRuleLookahead->push_back(EOFSymbol); + std::vector goalRuleLookahead; + goalRuleLookahead.push_back(EOFSymbol); goalRule->setLookahead(goalRuleLookahead); State* zeroState = new State(0, goalRule); stateSets.push_back(zeroState); - std::queue* toDo = new std::queue(); - toDo->push(zeroState); + std::queue toDo; + toDo.push(zeroState); //std::cout << "Begining for main set for loop" << std::endl; - while (toDo->size()) { + while (toDo.size()) { //closure - closure(toDo->front()); + closure(toDo.front()); //Add the new states - addStates(&stateSets, toDo->front(), toDo); - toDo->pop(); + addStates(&stateSets, toDo.front(), &toDo); + toDo.pop(); } table.remove(1, EOFSymbol); } @@ -210,13 +210,13 @@ bool Parser::isNullableHelper(Symbol token, std::set done) { } //Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. -std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { +std::vector Parser::incrementiveFollowSet(ParseRule* rule) { //Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end) rule = rule->clone(); rule->advancePointer(); //Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one - std::vector* followSet = new std::vector(); + std::vector followSet; std::vector symbolFirstSet; bool symbolFirstSetHasNull = true; while (symbolFirstSetHasNull && !rule->isAtEnd()) { @@ -229,34 +229,34 @@ std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { break; } } - followSet->insert(followSet->end(), symbolFirstSet.begin(), symbolFirstSet.end()); + followSet.insert(followSet.end(), symbolFirstSet.begin(), symbolFirstSet.end()); rule->advancePointer(); } if (rule->isAtEnd()) { - symbolFirstSet = *(rule->getLookahead()); - followSet->insert(followSet->end(), symbolFirstSet.begin(), symbolFirstSet.end()); + symbolFirstSet = rule->getLookahead(); + followSet.insert(followSet.end(), symbolFirstSet.begin(), symbolFirstSet.end()); } - std::vector* followSetReturn = new std::vector(); - for (std::vector::size_type i = 0; i < followSet->size(); i++) { + std::vector followSetReturn; + for (std::vector::size_type i = 0; i < followSet.size(); i++) { bool alreadyIn = false; - for (std::vector::size_type j = 0; j < followSetReturn->size(); j++) - if ((*followSet)[i] == (*followSetReturn)[j]) { + for (std::vector::size_type j = 0; j < followSetReturn.size(); j++) + if (followSet[i] == followSetReturn[j]) { alreadyIn = true; break; } if (!alreadyIn) - followSetReturn->push_back((*followSet)[i]); + followSetReturn.push_back(followSet[i]); } - delete followSet; + delete rule; return followSetReturn; } void Parser::closure(State* state) { //Add all the applicable rules. //std::cout << "Closure on " << state->toString() << " is" << std::endl; - std::vector* stateTotal = state->getTotal(); - for (std::vector::size_type i = 0; i < stateTotal->size(); i++) { - ParseRule* currentStateRule = (*stateTotal)[i]; + std::vector stateTotal = state->getTotal(); + for (std::vector::size_type i = 0; i < stateTotal.size(); i++) { + ParseRule* currentStateRule = stateTotal[i]; //If it's at it's end, move on. We can't advance it. if(currentStateRule->isAtEnd()) continue; @@ -271,10 +271,10 @@ void Parser::closure(State* state) { //Check to make sure not already in bool isAlreadyInState = false; - for (std::vector::size_type k = 0; k < stateTotal->size(); k++) { - if ((*stateTotal)[k]->equalsExceptLookahead(*currentGramRule)) { + for (std::vector::size_type k = 0; k < stateTotal.size(); k++) { + if (stateTotal[k]->equalsExceptLookahead(*currentGramRule)) { //std::cout << (*stateTotal)[k]->toString() << std::endl; - (*stateTotal)[k]->addLookahead(currentGramRule->getLookahead()); + stateTotal[k]->addLookahead(currentGramRule->getLookahead()); isAlreadyInState = true; delete currentGramRule; break; @@ -294,10 +294,10 @@ void Parser::closure(State* state) { void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queue* toDo) { std::vector< State* > newStates; //For each rule in the state we already have - std::vector* currStateTotal = state->getTotal(); - for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { + std::vector currStateTotal = state->getTotal(); + for (std::vector::size_type i = 0; i < currStateTotal.size(); i++) { //Clone the current rule - ParseRule* advancedRule = (*currStateTotal)[i]->clone(); + ParseRule* advancedRule = currStateTotal[i]->clone(); //Try to advance the pointer, if sucessful see if it is the correct next symbol if (advancedRule->advancePointer()) { //Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state @@ -324,16 +324,16 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu //Also add any completed rules as reduces in the action table //See if reduce //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... - std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); - if ((*currStateTotal)[i]->isAtEnd()) { - for (std::vector::size_type j = 0; j < lookahead->size(); j++) - table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); - } else if ((*currStateTotal)[i]->getAtNextIndex() == nullSymbol) { + std::vector lookahead = currStateTotal[i]->getLookahead(); + if (currStateTotal[i]->isAtEnd()) { + for (std::vector::size_type j = 0; j < lookahead.size(); j++) + table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i])); + } else if (currStateTotal[i]->getAtNextIndex() == nullSymbol) { //If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack) - ParseRule* nullRule = (*currStateTotal)[i]->clone(); - nullRule->setRightSide(* new std::vector()); - for (std::vector::size_type j = 0; j < lookahead->size(); j++) - table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, nullRule)); + ParseRule* nullRule = currStateTotal[i]->clone(); + nullRule->setRightSide(std::vector()); + for (std::vector::size_type j = 0; j < lookahead.size(); j++) + table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, nullRule)); } } //Put all our new states in the set of states only if they're not already there. diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 4867b52..6e17ce1 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -334,10 +334,10 @@ void RNGLRParser::setPacked(NodeTree* node, bool isPacked) { void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std::queue* toDo) { std::vector< State* > newStates; //For each rule in the state we already have - std::vector* currStateTotal = state->getTotal(); - for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { + std::vector currStateTotal = state->getTotal(); + for (std::vector::size_type i = 0; i < currStateTotal.size(); i++) { //Clone the current rule - ParseRule* advancedRule = (*currStateTotal)[i]->clone(); + ParseRule* advancedRule = currStateTotal[i]->clone(); //Try to advance the pointer, if sucessful see if it is the correct next symbol if (advancedRule->advancePointer()) { //Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state @@ -397,24 +397,24 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std: void RNGLRParser::addStateReductionsToTable(State* state) { - std::vector* currStateTotal = state->getTotal(); + std::vector currStateTotal = state->getTotal(); //std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl; - for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { + for (std::vector::size_type i = 0; i < currStateTotal.size(); i++) { //See if reduce //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... - std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); - if ((*currStateTotal)[i]->isAtEnd()) { - for (std::vector::size_type j = 0; j < lookahead->size(); j++) { - table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + std::vector lookahead = currStateTotal[i]->getLookahead(); + if (currStateTotal[i]->isAtEnd()) { + for (std::vector::size_type j = 0; j < lookahead.size(); j++) { + table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i])); } //If this has an appropriate ruduction to null, get the reduce trees out - } else if (reducesToNull((*currStateTotal)[i])) { + } else if (reducesToNull(currStateTotal[i])) { //std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl; //It used to be that if is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side that is equal to //the part that we've already gone through in the rule. (so we don't pop extra off stack) //Now we use the same rule and make sure that the index location is used - for (std::vector::size_type j = 0; j < lookahead->size(); j++) - table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + for (std::vector::size_type j = 0; j < lookahead.size(); j++) + table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i])); } } } diff --git a/src/State.cpp b/src/State.cpp index fc8621b..d6bd453 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -25,10 +25,10 @@ const bool State::operator==(const State &other) { return false; } - if (remaining.size() != other.remaining.size()) + if (remaining.size() != other.remaining.size()) return false; for (std::vector< ParseRule* >::size_type i = 0; i < remaining.size(); i++) { - if ( *(remaining[i]) != *(other.remaining[i]) ) + if ( *(remaining[i]) != *(other.remaining[i]) ) return false; } return true; @@ -77,12 +77,11 @@ void State::combineStates(State &other) { addParents(other.getParents()); } -std::vector* State::getTotal() { - total.clear(); - //std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl; +std::vector State::getTotal() { + std::vector total; total.insert(total.begin(), basis.begin(), basis.end()); total.insert(total.end(), remaining.begin(), remaining.end()); - return(&total); + return total; } std::vector* State::getBasis() { return &basis; @@ -92,7 +91,7 @@ std::vector* State::getRemaining() { } bool State::containsRule(ParseRule* rule) { - getTotal(); + auto total = getTotal(); for (std::vector::size_type i = 0; i < total.size(); i++) { if (*rule == *(total[i])) { return true; @@ -102,7 +101,7 @@ bool State::containsRule(ParseRule* rule) { } void State::addRuleCombineLookahead(ParseRule* rule) { - getTotal(); + auto total = getTotal(); bool alreadyIn = false; for (std::vector::size_type i = 0; i < total.size(); i++) { if (rule->equalsExceptLookahead(*(total[i]))) { @@ -162,4 +161,4 @@ std::vector* State::getDeepParents(int depth) { int State::getNumber() { return number; -} \ No newline at end of file +} diff --git a/src/StringReader.cpp b/src/StringReader.cpp index f4b6e50..029e698 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -40,6 +40,8 @@ std::string StringReader::line(bool truncateEnd) std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd) { + if (str_pos >= rd_string.size()) + return ""; size_t found_pos = rd_string.find_first_of(stop_chars, str_pos); if (rd_string[str_pos] == '\"') { diff --git a/src/Table.cpp b/src/Table.cpp index e4cdfd4..7dd548f 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -190,7 +190,7 @@ void Table::importTable(char* tableData) { tableData += sizeof(bool); rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue)); } - reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL); + reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, std::vector()); } int shiftState = *((int*)tableData); tableData += sizeof(int);